1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "gfx_v9_4.h" 51 #include "gfx_v9_0.h" 52 #include "gfx_v9_4_2.h" 53 54 #include "asic_reg/pwr/pwr_10_0_offset.h" 55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 56 #include "asic_reg/gc/gc_9_0_default.h" 57 58 #define GFX9_NUM_GFX_RINGS 1 59 #define GFX9_MEC_HPD_SIZE 4096 60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 118 119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 125 126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 129 130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 142 143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 147 148 enum ta_ras_gfx_subblock { 149 /*CPC*/ 150 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 151 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 152 TA_RAS_BLOCK__GFX_CPC_UCODE, 153 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 154 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 155 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 156 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 157 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 158 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 159 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 160 /* CPF*/ 161 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 162 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 163 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 164 TA_RAS_BLOCK__GFX_CPF_TAG, 165 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 166 /* CPG*/ 167 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 168 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 169 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 170 TA_RAS_BLOCK__GFX_CPG_TAG, 171 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 172 /* GDS*/ 173 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 174 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 175 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 176 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 177 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 178 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 179 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 180 /* SPI*/ 181 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 182 /* SQ*/ 183 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 184 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 185 TA_RAS_BLOCK__GFX_SQ_LDS_D, 186 TA_RAS_BLOCK__GFX_SQ_LDS_I, 187 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 188 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 189 /* SQC (3 ranges)*/ 190 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 191 /* SQC range 0*/ 192 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 194 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 195 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 196 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 197 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 198 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 199 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 200 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 201 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 202 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 203 /* SQC range 1*/ 204 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 205 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 206 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 215 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 217 /* SQC range 2*/ 218 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 219 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 220 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 221 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 222 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 223 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 224 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 226 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 227 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 228 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 229 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 230 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 231 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 232 /* TA*/ 233 TA_RAS_BLOCK__GFX_TA_INDEX_START, 234 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 235 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 236 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 237 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 238 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 239 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 240 /* TCA*/ 241 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 242 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 243 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 244 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 245 /* TCC (5 sub-ranges)*/ 246 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 247 /* TCC range 0*/ 248 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 249 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 250 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 251 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 252 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 253 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 254 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 255 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 256 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 257 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 258 /* TCC range 1*/ 259 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 260 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 261 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 262 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 263 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 264 /* TCC range 2*/ 265 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 266 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 267 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 268 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 269 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 270 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 271 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 272 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 273 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 274 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 275 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 276 /* TCC range 3*/ 277 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 278 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 279 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 280 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 281 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 282 /* TCC range 4*/ 283 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 284 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 285 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 286 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 287 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 288 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 289 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 290 /* TCI*/ 291 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 292 /* TCP*/ 293 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 294 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 295 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 296 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 297 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 298 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 299 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 300 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 301 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 302 /* TD*/ 303 TA_RAS_BLOCK__GFX_TD_INDEX_START, 304 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 305 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 306 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 307 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 308 /* EA (3 sub-ranges)*/ 309 TA_RAS_BLOCK__GFX_EA_INDEX_START, 310 /* EA range 0*/ 311 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 312 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 313 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 314 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 315 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 316 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 317 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 318 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 319 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 320 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 321 /* EA range 1*/ 322 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 323 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 324 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 325 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 326 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 327 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 328 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 329 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 330 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 331 /* EA range 2*/ 332 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 333 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 334 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 335 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 336 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 337 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 338 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 339 /* UTC VM L2 bank*/ 340 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 341 /* UTC VM walker*/ 342 TA_RAS_BLOCK__UTC_VML2_WALKER, 343 /* UTC ATC L2 2MB cache*/ 344 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 345 /* UTC ATC L2 4KB cache*/ 346 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 347 TA_RAS_BLOCK__GFX_MAX 348 }; 349 350 struct ras_gfx_subblock { 351 unsigned char *name; 352 int ta_subblock; 353 int hw_supported_error_type; 354 int sw_supported_error_type; 355 }; 356 357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 358 [AMDGPU_RAS_BLOCK__##subblock] = { \ 359 #subblock, \ 360 TA_RAS_BLOCK__##subblock, \ 361 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 362 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 363 } 364 365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 366 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 367 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 371 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 377 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 378 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 379 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 380 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 382 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 383 0), 384 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 385 0), 386 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 388 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 390 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 394 0, 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 396 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 398 0, 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 402 0, 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 406 1), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 408 0, 0, 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 412 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 414 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 420 0, 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 424 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 426 0, 0, 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 428 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 430 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 432 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 434 0), 435 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 436 0), 437 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 438 0, 0), 439 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 440 0), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 442 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 444 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 450 1), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 452 1), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 454 1), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 456 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 458 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 471 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 474 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 476 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 478 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 488 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 509 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 510 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 511 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 512 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 513 }; 514 515 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 516 { 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 537 }; 538 539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 540 { 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 559 }; 560 561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 562 { 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 574 }; 575 576 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 577 { 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 602 }; 603 604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 605 { 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 613 }; 614 615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 616 { 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 636 }; 637 638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 639 { 640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 652 }; 653 654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 655 { 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 659 }; 660 661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 662 { 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 679 }; 680 681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 682 { 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 696 }; 697 698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 699 { 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 711 }; 712 713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 714 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 715 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 716 }; 717 718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 719 { 720 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 721 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 722 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 723 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 724 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 725 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 726 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 727 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 728 }; 729 730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 731 { 732 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 733 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 734 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 735 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 736 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 737 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 738 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 739 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 740 }; 741 742 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) 743 { 744 static void *scratch_reg0; 745 static void *scratch_reg1; 746 static void *scratch_reg2; 747 static void *scratch_reg3; 748 static void *spare_int; 749 static uint32_t grbm_cntl; 750 static uint32_t grbm_idx; 751 752 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; 753 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; 754 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; 755 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; 756 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; 757 758 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; 759 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; 760 761 if (amdgpu_sriov_runtime(adev)) { 762 pr_err("shouldn't call rlcg write register during runtime\n"); 763 return; 764 } 765 766 if (offset == grbm_cntl || offset == grbm_idx) { 767 if (offset == grbm_cntl) 768 writel(v, scratch_reg2); 769 else if (offset == grbm_idx) 770 writel(v, scratch_reg3); 771 772 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); 773 } else { 774 uint32_t i = 0; 775 uint32_t retries = 50000; 776 777 writel(v, scratch_reg0); 778 writel(offset | 0x80000000, scratch_reg1); 779 writel(1, spare_int); 780 for (i = 0; i < retries; i++) { 781 u32 tmp; 782 783 tmp = readl(scratch_reg1); 784 if (!(tmp & 0x80000000)) 785 break; 786 787 udelay(10); 788 } 789 if (i >= retries) 790 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); 791 } 792 793 } 794 795 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, 796 u32 v, u32 acc_flags, u32 hwip) 797 { 798 if ((acc_flags & AMDGPU_REGS_RLC) && 799 amdgpu_sriov_fullaccess(adev)) { 800 gfx_v9_0_rlcg_w(adev, offset, v, acc_flags); 801 802 return; 803 } 804 805 if (acc_flags & AMDGPU_REGS_NO_KIQ) 806 WREG32_NO_KIQ(offset, v); 807 else 808 WREG32(offset, v); 809 } 810 811 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 812 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 813 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 814 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 815 816 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 817 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 818 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 819 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 820 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 821 struct amdgpu_cu_info *cu_info); 822 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 823 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 824 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 825 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 826 void *ras_error_status); 827 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 828 void *inject_if); 829 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 830 831 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 832 uint64_t queue_mask) 833 { 834 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 835 amdgpu_ring_write(kiq_ring, 836 PACKET3_SET_RESOURCES_VMID_MASK(0) | 837 /* vmid_mask:0* queue_type:0 (KIQ) */ 838 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 839 amdgpu_ring_write(kiq_ring, 840 lower_32_bits(queue_mask)); /* queue mask lo */ 841 amdgpu_ring_write(kiq_ring, 842 upper_32_bits(queue_mask)); /* queue mask hi */ 843 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 844 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 845 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 846 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 847 } 848 849 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 850 struct amdgpu_ring *ring) 851 { 852 struct amdgpu_device *adev = kiq_ring->adev; 853 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 854 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 855 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 856 857 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 858 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 859 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 860 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 861 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 862 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 863 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 864 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 865 /*queue_type: normal compute queue */ 866 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 867 /* alloc format: all_on_one_pipe */ 868 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 869 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 870 /* num_queues: must be 1 */ 871 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 872 amdgpu_ring_write(kiq_ring, 873 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 874 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 875 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 876 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 877 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 878 } 879 880 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 881 struct amdgpu_ring *ring, 882 enum amdgpu_unmap_queues_action action, 883 u64 gpu_addr, u64 seq) 884 { 885 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 886 887 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 888 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 889 PACKET3_UNMAP_QUEUES_ACTION(action) | 890 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 891 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 892 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 893 amdgpu_ring_write(kiq_ring, 894 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 895 896 if (action == PREEMPT_QUEUES_NO_UNMAP) { 897 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 898 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 899 amdgpu_ring_write(kiq_ring, seq); 900 } else { 901 amdgpu_ring_write(kiq_ring, 0); 902 amdgpu_ring_write(kiq_ring, 0); 903 amdgpu_ring_write(kiq_ring, 0); 904 } 905 } 906 907 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 908 struct amdgpu_ring *ring, 909 u64 addr, 910 u64 seq) 911 { 912 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 913 914 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 915 amdgpu_ring_write(kiq_ring, 916 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 917 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 918 PACKET3_QUERY_STATUS_COMMAND(2)); 919 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 920 amdgpu_ring_write(kiq_ring, 921 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 922 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 923 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 924 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 925 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 926 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 927 } 928 929 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 930 uint16_t pasid, uint32_t flush_type, 931 bool all_hub) 932 { 933 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 934 amdgpu_ring_write(kiq_ring, 935 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 936 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 937 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 938 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 939 } 940 941 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 942 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 943 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 944 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 945 .kiq_query_status = gfx_v9_0_kiq_query_status, 946 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 947 .set_resources_size = 8, 948 .map_queues_size = 7, 949 .unmap_queues_size = 6, 950 .query_status_size = 7, 951 .invalidate_tlbs_size = 2, 952 }; 953 954 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 955 { 956 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 957 } 958 959 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 960 { 961 switch (adev->ip_versions[GC_HWIP][0]) { 962 case IP_VERSION(9, 0, 1): 963 soc15_program_register_sequence(adev, 964 golden_settings_gc_9_0, 965 ARRAY_SIZE(golden_settings_gc_9_0)); 966 soc15_program_register_sequence(adev, 967 golden_settings_gc_9_0_vg10, 968 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 969 break; 970 case IP_VERSION(9, 2, 1): 971 soc15_program_register_sequence(adev, 972 golden_settings_gc_9_2_1, 973 ARRAY_SIZE(golden_settings_gc_9_2_1)); 974 soc15_program_register_sequence(adev, 975 golden_settings_gc_9_2_1_vg12, 976 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 977 break; 978 case IP_VERSION(9, 4, 0): 979 soc15_program_register_sequence(adev, 980 golden_settings_gc_9_0, 981 ARRAY_SIZE(golden_settings_gc_9_0)); 982 soc15_program_register_sequence(adev, 983 golden_settings_gc_9_0_vg20, 984 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 985 break; 986 case IP_VERSION(9, 4, 1): 987 soc15_program_register_sequence(adev, 988 golden_settings_gc_9_4_1_arct, 989 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 990 break; 991 case IP_VERSION(9, 2, 2): 992 case IP_VERSION(9, 1, 0): 993 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 994 ARRAY_SIZE(golden_settings_gc_9_1)); 995 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 996 soc15_program_register_sequence(adev, 997 golden_settings_gc_9_1_rv2, 998 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 999 else 1000 soc15_program_register_sequence(adev, 1001 golden_settings_gc_9_1_rv1, 1002 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1003 break; 1004 case IP_VERSION(9, 3, 0): 1005 soc15_program_register_sequence(adev, 1006 golden_settings_gc_9_1_rn, 1007 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1008 return; /* for renoir, don't need common goldensetting */ 1009 case IP_VERSION(9, 4, 2): 1010 gfx_v9_4_2_init_golden_registers(adev, 1011 adev->smuio.funcs->get_die_id(adev)); 1012 break; 1013 default: 1014 break; 1015 } 1016 1017 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) && 1018 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))) 1019 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1020 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1021 } 1022 1023 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 1024 { 1025 adev->gfx.scratch.num_reg = 8; 1026 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1027 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 1028 } 1029 1030 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1031 bool wc, uint32_t reg, uint32_t val) 1032 { 1033 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1034 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1035 WRITE_DATA_DST_SEL(0) | 1036 (wc ? WR_CONFIRM : 0)); 1037 amdgpu_ring_write(ring, reg); 1038 amdgpu_ring_write(ring, 0); 1039 amdgpu_ring_write(ring, val); 1040 } 1041 1042 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1043 int mem_space, int opt, uint32_t addr0, 1044 uint32_t addr1, uint32_t ref, uint32_t mask, 1045 uint32_t inv) 1046 { 1047 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1048 amdgpu_ring_write(ring, 1049 /* memory (1) or register (0) */ 1050 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1051 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1052 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1053 WAIT_REG_MEM_ENGINE(eng_sel))); 1054 1055 if (mem_space) 1056 BUG_ON(addr0 & 0x3); /* Dword align */ 1057 amdgpu_ring_write(ring, addr0); 1058 amdgpu_ring_write(ring, addr1); 1059 amdgpu_ring_write(ring, ref); 1060 amdgpu_ring_write(ring, mask); 1061 amdgpu_ring_write(ring, inv); /* poll interval */ 1062 } 1063 1064 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1065 { 1066 struct amdgpu_device *adev = ring->adev; 1067 uint32_t scratch; 1068 uint32_t tmp = 0; 1069 unsigned i; 1070 int r; 1071 1072 r = amdgpu_gfx_scratch_get(adev, &scratch); 1073 if (r) 1074 return r; 1075 1076 WREG32(scratch, 0xCAFEDEAD); 1077 r = amdgpu_ring_alloc(ring, 3); 1078 if (r) 1079 goto error_free_scratch; 1080 1081 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1082 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 1083 amdgpu_ring_write(ring, 0xDEADBEEF); 1084 amdgpu_ring_commit(ring); 1085 1086 for (i = 0; i < adev->usec_timeout; i++) { 1087 tmp = RREG32(scratch); 1088 if (tmp == 0xDEADBEEF) 1089 break; 1090 udelay(1); 1091 } 1092 1093 if (i >= adev->usec_timeout) 1094 r = -ETIMEDOUT; 1095 1096 error_free_scratch: 1097 amdgpu_gfx_scratch_free(adev, scratch); 1098 return r; 1099 } 1100 1101 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1102 { 1103 struct amdgpu_device *adev = ring->adev; 1104 struct amdgpu_ib ib; 1105 struct dma_fence *f = NULL; 1106 1107 unsigned index; 1108 uint64_t gpu_addr; 1109 uint32_t tmp; 1110 long r; 1111 1112 r = amdgpu_device_wb_get(adev, &index); 1113 if (r) 1114 return r; 1115 1116 gpu_addr = adev->wb.gpu_addr + (index * 4); 1117 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1118 memset(&ib, 0, sizeof(ib)); 1119 r = amdgpu_ib_get(adev, NULL, 16, 1120 AMDGPU_IB_POOL_DIRECT, &ib); 1121 if (r) 1122 goto err1; 1123 1124 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1125 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1126 ib.ptr[2] = lower_32_bits(gpu_addr); 1127 ib.ptr[3] = upper_32_bits(gpu_addr); 1128 ib.ptr[4] = 0xDEADBEEF; 1129 ib.length_dw = 5; 1130 1131 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1132 if (r) 1133 goto err2; 1134 1135 r = dma_fence_wait_timeout(f, false, timeout); 1136 if (r == 0) { 1137 r = -ETIMEDOUT; 1138 goto err2; 1139 } else if (r < 0) { 1140 goto err2; 1141 } 1142 1143 tmp = adev->wb.wb[index]; 1144 if (tmp == 0xDEADBEEF) 1145 r = 0; 1146 else 1147 r = -EINVAL; 1148 1149 err2: 1150 amdgpu_ib_free(adev, &ib, NULL); 1151 dma_fence_put(f); 1152 err1: 1153 amdgpu_device_wb_free(adev, index); 1154 return r; 1155 } 1156 1157 1158 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1159 { 1160 release_firmware(adev->gfx.pfp_fw); 1161 adev->gfx.pfp_fw = NULL; 1162 release_firmware(adev->gfx.me_fw); 1163 adev->gfx.me_fw = NULL; 1164 release_firmware(adev->gfx.ce_fw); 1165 adev->gfx.ce_fw = NULL; 1166 release_firmware(adev->gfx.rlc_fw); 1167 adev->gfx.rlc_fw = NULL; 1168 release_firmware(adev->gfx.mec_fw); 1169 adev->gfx.mec_fw = NULL; 1170 release_firmware(adev->gfx.mec2_fw); 1171 adev->gfx.mec2_fw = NULL; 1172 1173 kfree(adev->gfx.rlc.register_list_format); 1174 } 1175 1176 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1177 { 1178 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1179 1180 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1181 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1182 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1183 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1184 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1185 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1186 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1187 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1188 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1189 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1190 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1191 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1192 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1193 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1194 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1195 } 1196 1197 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1198 { 1199 adev->gfx.me_fw_write_wait = false; 1200 adev->gfx.mec_fw_write_wait = false; 1201 1202 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) && 1203 ((adev->gfx.mec_fw_version < 0x000001a5) || 1204 (adev->gfx.mec_feature_version < 46) || 1205 (adev->gfx.pfp_fw_version < 0x000000b7) || 1206 (adev->gfx.pfp_feature_version < 46))) 1207 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1208 1209 switch (adev->ip_versions[GC_HWIP][0]) { 1210 case IP_VERSION(9, 0, 1): 1211 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1212 (adev->gfx.me_feature_version >= 42) && 1213 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1214 (adev->gfx.pfp_feature_version >= 42)) 1215 adev->gfx.me_fw_write_wait = true; 1216 1217 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1218 (adev->gfx.mec_feature_version >= 42)) 1219 adev->gfx.mec_fw_write_wait = true; 1220 break; 1221 case IP_VERSION(9, 2, 1): 1222 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1223 (adev->gfx.me_feature_version >= 44) && 1224 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1225 (adev->gfx.pfp_feature_version >= 44)) 1226 adev->gfx.me_fw_write_wait = true; 1227 1228 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1229 (adev->gfx.mec_feature_version >= 44)) 1230 adev->gfx.mec_fw_write_wait = true; 1231 break; 1232 case IP_VERSION(9, 4, 0): 1233 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1234 (adev->gfx.me_feature_version >= 44) && 1235 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1236 (adev->gfx.pfp_feature_version >= 44)) 1237 adev->gfx.me_fw_write_wait = true; 1238 1239 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1240 (adev->gfx.mec_feature_version >= 44)) 1241 adev->gfx.mec_fw_write_wait = true; 1242 break; 1243 case IP_VERSION(9, 1, 0): 1244 case IP_VERSION(9, 2, 2): 1245 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1246 (adev->gfx.me_feature_version >= 42) && 1247 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1248 (adev->gfx.pfp_feature_version >= 42)) 1249 adev->gfx.me_fw_write_wait = true; 1250 1251 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1252 (adev->gfx.mec_feature_version >= 42)) 1253 adev->gfx.mec_fw_write_wait = true; 1254 break; 1255 default: 1256 adev->gfx.me_fw_write_wait = true; 1257 adev->gfx.mec_fw_write_wait = true; 1258 break; 1259 } 1260 } 1261 1262 struct amdgpu_gfxoff_quirk { 1263 u16 chip_vendor; 1264 u16 chip_device; 1265 u16 subsys_vendor; 1266 u16 subsys_device; 1267 u8 revision; 1268 }; 1269 1270 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1271 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1272 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1273 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1274 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1275 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1276 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1277 { 0, 0, 0, 0, 0 }, 1278 }; 1279 1280 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1281 { 1282 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1283 1284 while (p && p->chip_device != 0) { 1285 if (pdev->vendor == p->chip_vendor && 1286 pdev->device == p->chip_device && 1287 pdev->subsystem_vendor == p->subsys_vendor && 1288 pdev->subsystem_device == p->subsys_device && 1289 pdev->revision == p->revision) { 1290 return true; 1291 } 1292 ++p; 1293 } 1294 return false; 1295 } 1296 1297 static bool is_raven_kicker(struct amdgpu_device *adev) 1298 { 1299 if (adev->pm.fw_version >= 0x41e2b) 1300 return true; 1301 else 1302 return false; 1303 } 1304 1305 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1306 { 1307 if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) && 1308 (adev->gfx.me_fw_version >= 0x000000a5) && 1309 (adev->gfx.me_feature_version >= 52)) 1310 return true; 1311 else 1312 return false; 1313 } 1314 1315 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1316 { 1317 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1318 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1319 1320 switch (adev->ip_versions[GC_HWIP][0]) { 1321 case IP_VERSION(9, 0, 1): 1322 case IP_VERSION(9, 2, 1): 1323 case IP_VERSION(9, 4, 0): 1324 break; 1325 case IP_VERSION(9, 2, 2): 1326 case IP_VERSION(9, 1, 0): 1327 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1328 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1329 ((!is_raven_kicker(adev) && 1330 adev->gfx.rlc_fw_version < 531) || 1331 (adev->gfx.rlc_feature_version < 1) || 1332 !adev->gfx.rlc.is_rlc_v2_1)) 1333 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1334 1335 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1336 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1337 AMD_PG_SUPPORT_CP | 1338 AMD_PG_SUPPORT_RLC_SMU_HS; 1339 break; 1340 case IP_VERSION(9, 3, 0): 1341 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1342 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1343 AMD_PG_SUPPORT_CP | 1344 AMD_PG_SUPPORT_RLC_SMU_HS; 1345 break; 1346 default: 1347 break; 1348 } 1349 } 1350 1351 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1352 const char *chip_name) 1353 { 1354 char fw_name[30]; 1355 int err; 1356 struct amdgpu_firmware_info *info = NULL; 1357 const struct common_firmware_header *header = NULL; 1358 const struct gfx_firmware_header_v1_0 *cp_hdr; 1359 1360 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1361 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1362 if (err) 1363 goto out; 1364 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1365 if (err) 1366 goto out; 1367 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1368 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1369 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1370 1371 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1372 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1373 if (err) 1374 goto out; 1375 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1376 if (err) 1377 goto out; 1378 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1379 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1380 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1381 1382 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1383 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1384 if (err) 1385 goto out; 1386 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1387 if (err) 1388 goto out; 1389 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1390 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1391 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1392 1393 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1394 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1395 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1396 info->fw = adev->gfx.pfp_fw; 1397 header = (const struct common_firmware_header *)info->fw->data; 1398 adev->firmware.fw_size += 1399 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1400 1401 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1402 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1403 info->fw = adev->gfx.me_fw; 1404 header = (const struct common_firmware_header *)info->fw->data; 1405 adev->firmware.fw_size += 1406 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1407 1408 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1409 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1410 info->fw = adev->gfx.ce_fw; 1411 header = (const struct common_firmware_header *)info->fw->data; 1412 adev->firmware.fw_size += 1413 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1414 } 1415 1416 out: 1417 if (err) { 1418 dev_err(adev->dev, 1419 "gfx9: Failed to load firmware \"%s\"\n", 1420 fw_name); 1421 release_firmware(adev->gfx.pfp_fw); 1422 adev->gfx.pfp_fw = NULL; 1423 release_firmware(adev->gfx.me_fw); 1424 adev->gfx.me_fw = NULL; 1425 release_firmware(adev->gfx.ce_fw); 1426 adev->gfx.ce_fw = NULL; 1427 } 1428 return err; 1429 } 1430 1431 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1432 const char *chip_name) 1433 { 1434 char fw_name[30]; 1435 int err; 1436 struct amdgpu_firmware_info *info = NULL; 1437 const struct common_firmware_header *header = NULL; 1438 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1439 unsigned int *tmp = NULL; 1440 unsigned int i = 0; 1441 uint16_t version_major; 1442 uint16_t version_minor; 1443 uint32_t smu_version; 1444 1445 /* 1446 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1447 * instead of picasso_rlc.bin. 1448 * Judgment method: 1449 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1450 * or revision >= 0xD8 && revision <= 0xDF 1451 * otherwise is PCO FP5 1452 */ 1453 if (!strcmp(chip_name, "picasso") && 1454 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1455 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1456 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1457 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1458 (smu_version >= 0x41e2b)) 1459 /** 1460 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1461 */ 1462 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1463 else 1464 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1465 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1466 if (err) 1467 goto out; 1468 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1469 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1470 1471 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1472 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1473 if (version_major == 2 && version_minor == 1) 1474 adev->gfx.rlc.is_rlc_v2_1 = true; 1475 1476 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1477 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1478 adev->gfx.rlc.save_and_restore_offset = 1479 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1480 adev->gfx.rlc.clear_state_descriptor_offset = 1481 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1482 adev->gfx.rlc.avail_scratch_ram_locations = 1483 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1484 adev->gfx.rlc.reg_restore_list_size = 1485 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1486 adev->gfx.rlc.reg_list_format_start = 1487 le32_to_cpu(rlc_hdr->reg_list_format_start); 1488 adev->gfx.rlc.reg_list_format_separate_start = 1489 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1490 adev->gfx.rlc.starting_offsets_start = 1491 le32_to_cpu(rlc_hdr->starting_offsets_start); 1492 adev->gfx.rlc.reg_list_format_size_bytes = 1493 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1494 adev->gfx.rlc.reg_list_size_bytes = 1495 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1496 adev->gfx.rlc.register_list_format = 1497 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1498 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1499 if (!adev->gfx.rlc.register_list_format) { 1500 err = -ENOMEM; 1501 goto out; 1502 } 1503 1504 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1505 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1506 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1507 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1508 1509 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1510 1511 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1512 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1513 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1514 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1515 1516 if (adev->gfx.rlc.is_rlc_v2_1) 1517 gfx_v9_0_init_rlc_ext_microcode(adev); 1518 1519 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1520 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1521 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1522 info->fw = adev->gfx.rlc_fw; 1523 header = (const struct common_firmware_header *)info->fw->data; 1524 adev->firmware.fw_size += 1525 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1526 1527 if (adev->gfx.rlc.is_rlc_v2_1 && 1528 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1529 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1530 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1531 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1532 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1533 info->fw = adev->gfx.rlc_fw; 1534 adev->firmware.fw_size += 1535 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1536 1537 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1538 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1539 info->fw = adev->gfx.rlc_fw; 1540 adev->firmware.fw_size += 1541 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1542 1543 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1544 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1545 info->fw = adev->gfx.rlc_fw; 1546 adev->firmware.fw_size += 1547 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1548 } 1549 } 1550 1551 out: 1552 if (err) { 1553 dev_err(adev->dev, 1554 "gfx9: Failed to load firmware \"%s\"\n", 1555 fw_name); 1556 release_firmware(adev->gfx.rlc_fw); 1557 adev->gfx.rlc_fw = NULL; 1558 } 1559 return err; 1560 } 1561 1562 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1563 { 1564 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || 1565 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || 1566 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) 1567 return false; 1568 1569 return true; 1570 } 1571 1572 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1573 const char *chip_name) 1574 { 1575 char fw_name[30]; 1576 int err; 1577 struct amdgpu_firmware_info *info = NULL; 1578 const struct common_firmware_header *header = NULL; 1579 const struct gfx_firmware_header_v1_0 *cp_hdr; 1580 1581 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1582 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1583 if (err) 1584 goto out; 1585 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1586 if (err) 1587 goto out; 1588 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1589 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1590 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1591 1592 1593 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1594 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1595 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1596 if (!err) { 1597 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1598 if (err) 1599 goto out; 1600 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1601 adev->gfx.mec2_fw->data; 1602 adev->gfx.mec2_fw_version = 1603 le32_to_cpu(cp_hdr->header.ucode_version); 1604 adev->gfx.mec2_feature_version = 1605 le32_to_cpu(cp_hdr->ucode_feature_version); 1606 } else { 1607 err = 0; 1608 adev->gfx.mec2_fw = NULL; 1609 } 1610 } else { 1611 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1612 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1613 } 1614 1615 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1616 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1617 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1618 info->fw = adev->gfx.mec_fw; 1619 header = (const struct common_firmware_header *)info->fw->data; 1620 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1621 adev->firmware.fw_size += 1622 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1623 1624 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1625 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1626 info->fw = adev->gfx.mec_fw; 1627 adev->firmware.fw_size += 1628 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1629 1630 if (adev->gfx.mec2_fw) { 1631 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1632 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1633 info->fw = adev->gfx.mec2_fw; 1634 header = (const struct common_firmware_header *)info->fw->data; 1635 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1636 adev->firmware.fw_size += 1637 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1638 1639 /* TODO: Determine if MEC2 JT FW loading can be removed 1640 for all GFX V9 asic and above */ 1641 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1642 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1643 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1644 info->fw = adev->gfx.mec2_fw; 1645 adev->firmware.fw_size += 1646 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1647 PAGE_SIZE); 1648 } 1649 } 1650 } 1651 1652 out: 1653 gfx_v9_0_check_if_need_gfxoff(adev); 1654 gfx_v9_0_check_fw_write_wait(adev); 1655 if (err) { 1656 dev_err(adev->dev, 1657 "gfx9: Failed to load firmware \"%s\"\n", 1658 fw_name); 1659 release_firmware(adev->gfx.mec_fw); 1660 adev->gfx.mec_fw = NULL; 1661 release_firmware(adev->gfx.mec2_fw); 1662 adev->gfx.mec2_fw = NULL; 1663 } 1664 return err; 1665 } 1666 1667 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1668 { 1669 const char *chip_name; 1670 int r; 1671 1672 DRM_DEBUG("\n"); 1673 1674 switch (adev->ip_versions[GC_HWIP][0]) { 1675 case IP_VERSION(9, 0, 1): 1676 chip_name = "vega10"; 1677 break; 1678 case IP_VERSION(9, 2, 1): 1679 chip_name = "vega12"; 1680 break; 1681 case IP_VERSION(9, 4, 0): 1682 chip_name = "vega20"; 1683 break; 1684 case IP_VERSION(9, 2, 2): 1685 case IP_VERSION(9, 1, 0): 1686 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1687 chip_name = "raven2"; 1688 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1689 chip_name = "picasso"; 1690 else 1691 chip_name = "raven"; 1692 break; 1693 case IP_VERSION(9, 4, 1): 1694 chip_name = "arcturus"; 1695 break; 1696 case IP_VERSION(9, 3, 0): 1697 if (adev->apu_flags & AMD_APU_IS_RENOIR) 1698 chip_name = "renoir"; 1699 else 1700 chip_name = "green_sardine"; 1701 break; 1702 case IP_VERSION(9, 4, 2): 1703 chip_name = "aldebaran"; 1704 break; 1705 default: 1706 BUG(); 1707 } 1708 1709 /* No CPG in Arcturus */ 1710 if (adev->gfx.num_gfx_rings) { 1711 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1712 if (r) 1713 return r; 1714 } 1715 1716 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1717 if (r) 1718 return r; 1719 1720 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1721 if (r) 1722 return r; 1723 1724 return r; 1725 } 1726 1727 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1728 { 1729 u32 count = 0; 1730 const struct cs_section_def *sect = NULL; 1731 const struct cs_extent_def *ext = NULL; 1732 1733 /* begin clear state */ 1734 count += 2; 1735 /* context control state */ 1736 count += 3; 1737 1738 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1739 for (ext = sect->section; ext->extent != NULL; ++ext) { 1740 if (sect->id == SECT_CONTEXT) 1741 count += 2 + ext->reg_count; 1742 else 1743 return 0; 1744 } 1745 } 1746 1747 /* end clear state */ 1748 count += 2; 1749 /* clear state */ 1750 count += 2; 1751 1752 return count; 1753 } 1754 1755 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1756 volatile u32 *buffer) 1757 { 1758 u32 count = 0, i; 1759 const struct cs_section_def *sect = NULL; 1760 const struct cs_extent_def *ext = NULL; 1761 1762 if (adev->gfx.rlc.cs_data == NULL) 1763 return; 1764 if (buffer == NULL) 1765 return; 1766 1767 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1768 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1769 1770 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1771 buffer[count++] = cpu_to_le32(0x80000000); 1772 buffer[count++] = cpu_to_le32(0x80000000); 1773 1774 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1775 for (ext = sect->section; ext->extent != NULL; ++ext) { 1776 if (sect->id == SECT_CONTEXT) { 1777 buffer[count++] = 1778 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1779 buffer[count++] = cpu_to_le32(ext->reg_index - 1780 PACKET3_SET_CONTEXT_REG_START); 1781 for (i = 0; i < ext->reg_count; i++) 1782 buffer[count++] = cpu_to_le32(ext->extent[i]); 1783 } else { 1784 return; 1785 } 1786 } 1787 } 1788 1789 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1790 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1791 1792 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1793 buffer[count++] = cpu_to_le32(0); 1794 } 1795 1796 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1797 { 1798 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1799 uint32_t pg_always_on_cu_num = 2; 1800 uint32_t always_on_cu_num; 1801 uint32_t i, j, k; 1802 uint32_t mask, cu_bitmap, counter; 1803 1804 if (adev->flags & AMD_IS_APU) 1805 always_on_cu_num = 4; 1806 else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1)) 1807 always_on_cu_num = 8; 1808 else 1809 always_on_cu_num = 12; 1810 1811 mutex_lock(&adev->grbm_idx_mutex); 1812 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1813 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1814 mask = 1; 1815 cu_bitmap = 0; 1816 counter = 0; 1817 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1818 1819 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1820 if (cu_info->bitmap[i][j] & mask) { 1821 if (counter == pg_always_on_cu_num) 1822 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1823 if (counter < always_on_cu_num) 1824 cu_bitmap |= mask; 1825 else 1826 break; 1827 counter++; 1828 } 1829 mask <<= 1; 1830 } 1831 1832 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1833 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1834 } 1835 } 1836 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1837 mutex_unlock(&adev->grbm_idx_mutex); 1838 } 1839 1840 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1841 { 1842 uint32_t data; 1843 1844 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1845 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1846 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1847 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1848 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1849 1850 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1851 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1852 1853 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1854 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1855 1856 mutex_lock(&adev->grbm_idx_mutex); 1857 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1858 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1859 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1860 1861 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1862 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1863 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1864 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1865 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1866 1867 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1868 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1869 data &= 0x0000FFFF; 1870 data |= 0x00C00000; 1871 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1872 1873 /* 1874 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1875 * programmed in gfx_v9_0_init_always_on_cu_mask() 1876 */ 1877 1878 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1879 * but used for RLC_LB_CNTL configuration */ 1880 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1881 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1882 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1883 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1884 mutex_unlock(&adev->grbm_idx_mutex); 1885 1886 gfx_v9_0_init_always_on_cu_mask(adev); 1887 } 1888 1889 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1890 { 1891 uint32_t data; 1892 1893 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1894 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1895 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1896 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1897 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1898 1899 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1900 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1901 1902 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1903 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1904 1905 mutex_lock(&adev->grbm_idx_mutex); 1906 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1907 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1908 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1909 1910 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1911 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1912 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1913 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1914 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1915 1916 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1917 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1918 data &= 0x0000FFFF; 1919 data |= 0x00C00000; 1920 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1921 1922 /* 1923 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1924 * programmed in gfx_v9_0_init_always_on_cu_mask() 1925 */ 1926 1927 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1928 * but used for RLC_LB_CNTL configuration */ 1929 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1930 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1931 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1932 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1933 mutex_unlock(&adev->grbm_idx_mutex); 1934 1935 gfx_v9_0_init_always_on_cu_mask(adev); 1936 } 1937 1938 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1939 { 1940 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1941 } 1942 1943 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1944 { 1945 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1946 return 5; 1947 else 1948 return 4; 1949 } 1950 1951 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1952 { 1953 const struct cs_section_def *cs_data; 1954 int r; 1955 1956 adev->gfx.rlc.cs_data = gfx9_cs_data; 1957 1958 cs_data = adev->gfx.rlc.cs_data; 1959 1960 if (cs_data) { 1961 /* init clear state block */ 1962 r = amdgpu_gfx_rlc_init_csb(adev); 1963 if (r) 1964 return r; 1965 } 1966 1967 if (adev->flags & AMD_IS_APU) { 1968 /* TODO: double check the cp_table_size for RV */ 1969 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1970 r = amdgpu_gfx_rlc_init_cpt(adev); 1971 if (r) 1972 return r; 1973 } 1974 1975 switch (adev->ip_versions[GC_HWIP][0]) { 1976 case IP_VERSION(9, 2, 2): 1977 case IP_VERSION(9, 1, 0): 1978 gfx_v9_0_init_lbpw(adev); 1979 break; 1980 case IP_VERSION(9, 4, 0): 1981 gfx_v9_4_init_lbpw(adev); 1982 break; 1983 default: 1984 break; 1985 } 1986 1987 /* init spm vmid with 0xf */ 1988 if (adev->gfx.rlc.funcs->update_spm_vmid) 1989 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 1990 1991 return 0; 1992 } 1993 1994 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1995 { 1996 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1997 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1998 } 1999 2000 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 2001 { 2002 int r; 2003 u32 *hpd; 2004 const __le32 *fw_data; 2005 unsigned fw_size; 2006 u32 *fw; 2007 size_t mec_hpd_size; 2008 2009 const struct gfx_firmware_header_v1_0 *mec_hdr; 2010 2011 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2012 2013 /* take ownership of the relevant compute queues */ 2014 amdgpu_gfx_compute_queue_acquire(adev); 2015 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 2016 if (mec_hpd_size) { 2017 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 2018 AMDGPU_GEM_DOMAIN_VRAM, 2019 &adev->gfx.mec.hpd_eop_obj, 2020 &adev->gfx.mec.hpd_eop_gpu_addr, 2021 (void **)&hpd); 2022 if (r) { 2023 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 2024 gfx_v9_0_mec_fini(adev); 2025 return r; 2026 } 2027 2028 memset(hpd, 0, mec_hpd_size); 2029 2030 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 2031 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 2032 } 2033 2034 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2035 2036 fw_data = (const __le32 *) 2037 (adev->gfx.mec_fw->data + 2038 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2039 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 2040 2041 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 2042 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2043 &adev->gfx.mec.mec_fw_obj, 2044 &adev->gfx.mec.mec_fw_gpu_addr, 2045 (void **)&fw); 2046 if (r) { 2047 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 2048 gfx_v9_0_mec_fini(adev); 2049 return r; 2050 } 2051 2052 memcpy(fw, fw_data, fw_size); 2053 2054 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 2055 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 2056 2057 return 0; 2058 } 2059 2060 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 2061 { 2062 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2063 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2064 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2065 (address << SQ_IND_INDEX__INDEX__SHIFT) | 2066 (SQ_IND_INDEX__FORCE_READ_MASK)); 2067 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2068 } 2069 2070 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 2071 uint32_t wave, uint32_t thread, 2072 uint32_t regno, uint32_t num, uint32_t *out) 2073 { 2074 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2075 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2076 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2077 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 2078 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 2079 (SQ_IND_INDEX__FORCE_READ_MASK) | 2080 (SQ_IND_INDEX__AUTO_INCR_MASK)); 2081 while (num--) 2082 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2083 } 2084 2085 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 2086 { 2087 /* type 1 wave data */ 2088 dst[(*no_fields)++] = 1; 2089 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 2090 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 2091 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 2092 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 2093 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 2094 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 2095 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 2096 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 2097 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 2098 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 2099 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 2100 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 2101 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 2102 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 2103 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 2104 } 2105 2106 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 2107 uint32_t wave, uint32_t start, 2108 uint32_t size, uint32_t *dst) 2109 { 2110 wave_read_regs( 2111 adev, simd, wave, 0, 2112 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 2113 } 2114 2115 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 2116 uint32_t wave, uint32_t thread, 2117 uint32_t start, uint32_t size, 2118 uint32_t *dst) 2119 { 2120 wave_read_regs( 2121 adev, simd, wave, thread, 2122 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 2123 } 2124 2125 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 2126 u32 me, u32 pipe, u32 q, u32 vm) 2127 { 2128 soc15_grbm_select(adev, me, pipe, q, vm); 2129 } 2130 2131 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2132 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2133 .select_se_sh = &gfx_v9_0_select_se_sh, 2134 .read_wave_data = &gfx_v9_0_read_wave_data, 2135 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2136 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2137 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2138 }; 2139 2140 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = { 2141 .ras_late_init = amdgpu_gfx_ras_late_init, 2142 .ras_fini = amdgpu_gfx_ras_fini, 2143 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2144 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2145 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2146 }; 2147 2148 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2149 { 2150 u32 gb_addr_config; 2151 int err; 2152 2153 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 2154 2155 switch (adev->ip_versions[GC_HWIP][0]) { 2156 case IP_VERSION(9, 0, 1): 2157 adev->gfx.config.max_hw_contexts = 8; 2158 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2159 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2160 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2161 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2162 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2163 break; 2164 case IP_VERSION(9, 2, 1): 2165 adev->gfx.config.max_hw_contexts = 8; 2166 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2167 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2168 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2169 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2170 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2171 DRM_INFO("fix gfx.config for vega12\n"); 2172 break; 2173 case IP_VERSION(9, 4, 0): 2174 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs; 2175 adev->gfx.config.max_hw_contexts = 8; 2176 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2177 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2178 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2179 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2180 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2181 gb_addr_config &= ~0xf3e777ff; 2182 gb_addr_config |= 0x22014042; 2183 /* check vbios table if gpu info is not available */ 2184 err = amdgpu_atomfirmware_get_gfx_info(adev); 2185 if (err) 2186 return err; 2187 break; 2188 case IP_VERSION(9, 2, 2): 2189 case IP_VERSION(9, 1, 0): 2190 adev->gfx.config.max_hw_contexts = 8; 2191 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2192 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2193 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2194 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2195 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2196 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2197 else 2198 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2199 break; 2200 case IP_VERSION(9, 4, 1): 2201 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs; 2202 adev->gfx.config.max_hw_contexts = 8; 2203 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2204 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2205 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2206 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2207 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2208 gb_addr_config &= ~0xf3e777ff; 2209 gb_addr_config |= 0x22014042; 2210 break; 2211 case IP_VERSION(9, 3, 0): 2212 adev->gfx.config.max_hw_contexts = 8; 2213 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2214 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2215 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2216 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2217 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2218 gb_addr_config &= ~0xf3e777ff; 2219 gb_addr_config |= 0x22010042; 2220 break; 2221 case IP_VERSION(9, 4, 2): 2222 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs; 2223 adev->gfx.config.max_hw_contexts = 8; 2224 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2225 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2226 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2227 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2228 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2229 gb_addr_config &= ~0xf3e777ff; 2230 gb_addr_config |= 0x22014042; 2231 /* check vbios table if gpu info is not available */ 2232 err = amdgpu_atomfirmware_get_gfx_info(adev); 2233 if (err) 2234 return err; 2235 break; 2236 default: 2237 BUG(); 2238 break; 2239 } 2240 2241 adev->gfx.config.gb_addr_config = gb_addr_config; 2242 2243 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2244 REG_GET_FIELD( 2245 adev->gfx.config.gb_addr_config, 2246 GB_ADDR_CONFIG, 2247 NUM_PIPES); 2248 2249 adev->gfx.config.max_tile_pipes = 2250 adev->gfx.config.gb_addr_config_fields.num_pipes; 2251 2252 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2253 REG_GET_FIELD( 2254 adev->gfx.config.gb_addr_config, 2255 GB_ADDR_CONFIG, 2256 NUM_BANKS); 2257 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2258 REG_GET_FIELD( 2259 adev->gfx.config.gb_addr_config, 2260 GB_ADDR_CONFIG, 2261 MAX_COMPRESSED_FRAGS); 2262 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2263 REG_GET_FIELD( 2264 adev->gfx.config.gb_addr_config, 2265 GB_ADDR_CONFIG, 2266 NUM_RB_PER_SE); 2267 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2268 REG_GET_FIELD( 2269 adev->gfx.config.gb_addr_config, 2270 GB_ADDR_CONFIG, 2271 NUM_SHADER_ENGINES); 2272 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2273 REG_GET_FIELD( 2274 adev->gfx.config.gb_addr_config, 2275 GB_ADDR_CONFIG, 2276 PIPE_INTERLEAVE_SIZE)); 2277 2278 return 0; 2279 } 2280 2281 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2282 int mec, int pipe, int queue) 2283 { 2284 unsigned irq_type; 2285 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2286 unsigned int hw_prio; 2287 2288 ring = &adev->gfx.compute_ring[ring_id]; 2289 2290 /* mec0 is me1 */ 2291 ring->me = mec + 1; 2292 ring->pipe = pipe; 2293 ring->queue = queue; 2294 2295 ring->ring_obj = NULL; 2296 ring->use_doorbell = true; 2297 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2298 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2299 + (ring_id * GFX9_MEC_HPD_SIZE); 2300 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2301 2302 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2303 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2304 + ring->pipe; 2305 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2306 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 2307 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2308 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2309 hw_prio, NULL); 2310 } 2311 2312 static int gfx_v9_0_sw_init(void *handle) 2313 { 2314 int i, j, k, r, ring_id; 2315 struct amdgpu_ring *ring; 2316 struct amdgpu_kiq *kiq; 2317 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2318 2319 switch (adev->ip_versions[GC_HWIP][0]) { 2320 case IP_VERSION(9, 0, 1): 2321 case IP_VERSION(9, 2, 1): 2322 case IP_VERSION(9, 4, 0): 2323 case IP_VERSION(9, 2, 2): 2324 case IP_VERSION(9, 1, 0): 2325 case IP_VERSION(9, 4, 1): 2326 case IP_VERSION(9, 3, 0): 2327 case IP_VERSION(9, 4, 2): 2328 adev->gfx.mec.num_mec = 2; 2329 break; 2330 default: 2331 adev->gfx.mec.num_mec = 1; 2332 break; 2333 } 2334 2335 adev->gfx.mec.num_pipe_per_mec = 4; 2336 adev->gfx.mec.num_queue_per_pipe = 8; 2337 2338 /* EOP Event */ 2339 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2340 if (r) 2341 return r; 2342 2343 /* Privileged reg */ 2344 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2345 &adev->gfx.priv_reg_irq); 2346 if (r) 2347 return r; 2348 2349 /* Privileged inst */ 2350 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2351 &adev->gfx.priv_inst_irq); 2352 if (r) 2353 return r; 2354 2355 /* ECC error */ 2356 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2357 &adev->gfx.cp_ecc_error_irq); 2358 if (r) 2359 return r; 2360 2361 /* FUE error */ 2362 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2363 &adev->gfx.cp_ecc_error_irq); 2364 if (r) 2365 return r; 2366 2367 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2368 2369 gfx_v9_0_scratch_init(adev); 2370 2371 r = gfx_v9_0_init_microcode(adev); 2372 if (r) { 2373 DRM_ERROR("Failed to load gfx firmware!\n"); 2374 return r; 2375 } 2376 2377 r = adev->gfx.rlc.funcs->init(adev); 2378 if (r) { 2379 DRM_ERROR("Failed to init rlc BOs!\n"); 2380 return r; 2381 } 2382 2383 r = gfx_v9_0_mec_init(adev); 2384 if (r) { 2385 DRM_ERROR("Failed to init MEC BOs!\n"); 2386 return r; 2387 } 2388 2389 /* set up the gfx ring */ 2390 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2391 ring = &adev->gfx.gfx_ring[i]; 2392 ring->ring_obj = NULL; 2393 if (!i) 2394 sprintf(ring->name, "gfx"); 2395 else 2396 sprintf(ring->name, "gfx_%d", i); 2397 ring->use_doorbell = true; 2398 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2399 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2400 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2401 AMDGPU_RING_PRIO_DEFAULT, NULL); 2402 if (r) 2403 return r; 2404 } 2405 2406 /* set up the compute queues - allocate horizontally across pipes */ 2407 ring_id = 0; 2408 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2409 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2410 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2411 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2412 continue; 2413 2414 r = gfx_v9_0_compute_ring_init(adev, 2415 ring_id, 2416 i, k, j); 2417 if (r) 2418 return r; 2419 2420 ring_id++; 2421 } 2422 } 2423 } 2424 2425 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2426 if (r) { 2427 DRM_ERROR("Failed to init KIQ BOs!\n"); 2428 return r; 2429 } 2430 2431 kiq = &adev->gfx.kiq; 2432 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2433 if (r) 2434 return r; 2435 2436 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2437 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2438 if (r) 2439 return r; 2440 2441 adev->gfx.ce_ram_size = 0x8000; 2442 2443 r = gfx_v9_0_gpu_early_init(adev); 2444 if (r) 2445 return r; 2446 2447 return 0; 2448 } 2449 2450 2451 static int gfx_v9_0_sw_fini(void *handle) 2452 { 2453 int i; 2454 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2455 2456 if (adev->gfx.ras_funcs && 2457 adev->gfx.ras_funcs->ras_fini) 2458 adev->gfx.ras_funcs->ras_fini(adev); 2459 2460 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2461 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2462 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2463 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2464 2465 amdgpu_gfx_mqd_sw_fini(adev); 2466 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2467 amdgpu_gfx_kiq_fini(adev); 2468 2469 gfx_v9_0_mec_fini(adev); 2470 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2471 &adev->gfx.rlc.clear_state_gpu_addr, 2472 (void **)&adev->gfx.rlc.cs_ptr); 2473 if (adev->flags & AMD_IS_APU) { 2474 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2475 &adev->gfx.rlc.cp_table_gpu_addr, 2476 (void **)&adev->gfx.rlc.cp_table_ptr); 2477 } 2478 gfx_v9_0_free_microcode(adev); 2479 2480 return 0; 2481 } 2482 2483 2484 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2485 { 2486 /* TODO */ 2487 } 2488 2489 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2490 u32 instance) 2491 { 2492 u32 data; 2493 2494 if (instance == 0xffffffff) 2495 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2496 else 2497 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2498 2499 if (se_num == 0xffffffff) 2500 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2501 else 2502 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2503 2504 if (sh_num == 0xffffffff) 2505 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2506 else 2507 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2508 2509 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2510 } 2511 2512 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2513 { 2514 u32 data, mask; 2515 2516 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2517 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2518 2519 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2520 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2521 2522 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2523 adev->gfx.config.max_sh_per_se); 2524 2525 return (~data) & mask; 2526 } 2527 2528 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2529 { 2530 int i, j; 2531 u32 data; 2532 u32 active_rbs = 0; 2533 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2534 adev->gfx.config.max_sh_per_se; 2535 2536 mutex_lock(&adev->grbm_idx_mutex); 2537 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2538 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2539 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2540 data = gfx_v9_0_get_rb_active_bitmap(adev); 2541 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2542 rb_bitmap_width_per_sh); 2543 } 2544 } 2545 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2546 mutex_unlock(&adev->grbm_idx_mutex); 2547 2548 adev->gfx.config.backend_enable_mask = active_rbs; 2549 adev->gfx.config.num_rbs = hweight32(active_rbs); 2550 } 2551 2552 #define DEFAULT_SH_MEM_BASES (0x6000) 2553 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2554 { 2555 int i; 2556 uint32_t sh_mem_config; 2557 uint32_t sh_mem_bases; 2558 2559 /* 2560 * Configure apertures: 2561 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2562 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2563 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2564 */ 2565 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2566 2567 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2568 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2569 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2570 2571 mutex_lock(&adev->srbm_mutex); 2572 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2573 soc15_grbm_select(adev, 0, 0, 0, i); 2574 /* CP and shaders */ 2575 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2576 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2577 } 2578 soc15_grbm_select(adev, 0, 0, 0, 0); 2579 mutex_unlock(&adev->srbm_mutex); 2580 2581 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2582 acccess. These should be enabled by FW for target VMIDs. */ 2583 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2584 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2585 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2586 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2587 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2588 } 2589 } 2590 2591 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2592 { 2593 int vmid; 2594 2595 /* 2596 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2597 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2598 * the driver can enable them for graphics. VMID0 should maintain 2599 * access so that HWS firmware can save/restore entries. 2600 */ 2601 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2602 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2603 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2604 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2605 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2606 } 2607 } 2608 2609 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2610 { 2611 uint32_t tmp; 2612 2613 switch (adev->ip_versions[GC_HWIP][0]) { 2614 case IP_VERSION(9, 4, 1): 2615 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2616 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, 2617 DISABLE_BARRIER_WAITCNT, 1); 2618 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2619 break; 2620 default: 2621 break; 2622 } 2623 } 2624 2625 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2626 { 2627 u32 tmp; 2628 int i; 2629 2630 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2631 2632 gfx_v9_0_tiling_mode_table_init(adev); 2633 2634 gfx_v9_0_setup_rb(adev); 2635 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2636 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2637 2638 /* XXX SH_MEM regs */ 2639 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2640 mutex_lock(&adev->srbm_mutex); 2641 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2642 soc15_grbm_select(adev, 0, 0, 0, i); 2643 /* CP and shaders */ 2644 if (i == 0) { 2645 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2646 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2647 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2648 !!adev->gmc.noretry); 2649 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2650 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2651 } else { 2652 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2653 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2654 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2655 !!adev->gmc.noretry); 2656 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2657 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2658 (adev->gmc.private_aperture_start >> 48)); 2659 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2660 (adev->gmc.shared_aperture_start >> 48)); 2661 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2662 } 2663 } 2664 soc15_grbm_select(adev, 0, 0, 0, 0); 2665 2666 mutex_unlock(&adev->srbm_mutex); 2667 2668 gfx_v9_0_init_compute_vmid(adev); 2669 gfx_v9_0_init_gds_vmid(adev); 2670 gfx_v9_0_init_sq_config(adev); 2671 } 2672 2673 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2674 { 2675 u32 i, j, k; 2676 u32 mask; 2677 2678 mutex_lock(&adev->grbm_idx_mutex); 2679 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2680 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2681 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2682 for (k = 0; k < adev->usec_timeout; k++) { 2683 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2684 break; 2685 udelay(1); 2686 } 2687 if (k == adev->usec_timeout) { 2688 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2689 0xffffffff, 0xffffffff); 2690 mutex_unlock(&adev->grbm_idx_mutex); 2691 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2692 i, j); 2693 return; 2694 } 2695 } 2696 } 2697 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2698 mutex_unlock(&adev->grbm_idx_mutex); 2699 2700 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2701 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2702 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2703 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2704 for (k = 0; k < adev->usec_timeout; k++) { 2705 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2706 break; 2707 udelay(1); 2708 } 2709 } 2710 2711 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2712 bool enable) 2713 { 2714 u32 tmp; 2715 2716 /* These interrupts should be enabled to drive DS clock */ 2717 2718 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2719 2720 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2721 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2722 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2723 if(adev->gfx.num_gfx_rings) 2724 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2725 2726 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2727 } 2728 2729 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2730 { 2731 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2732 /* csib */ 2733 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2734 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2735 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2736 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2737 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2738 adev->gfx.rlc.clear_state_size); 2739 } 2740 2741 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2742 int indirect_offset, 2743 int list_size, 2744 int *unique_indirect_regs, 2745 int unique_indirect_reg_count, 2746 int *indirect_start_offsets, 2747 int *indirect_start_offsets_count, 2748 int max_start_offsets_count) 2749 { 2750 int idx; 2751 2752 for (; indirect_offset < list_size; indirect_offset++) { 2753 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2754 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2755 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2756 2757 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2758 indirect_offset += 2; 2759 2760 /* look for the matching indice */ 2761 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2762 if (unique_indirect_regs[idx] == 2763 register_list_format[indirect_offset] || 2764 !unique_indirect_regs[idx]) 2765 break; 2766 } 2767 2768 BUG_ON(idx >= unique_indirect_reg_count); 2769 2770 if (!unique_indirect_regs[idx]) 2771 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2772 2773 indirect_offset++; 2774 } 2775 } 2776 } 2777 2778 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2779 { 2780 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2781 int unique_indirect_reg_count = 0; 2782 2783 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2784 int indirect_start_offsets_count = 0; 2785 2786 int list_size = 0; 2787 int i = 0, j = 0; 2788 u32 tmp = 0; 2789 2790 u32 *register_list_format = 2791 kmemdup(adev->gfx.rlc.register_list_format, 2792 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2793 if (!register_list_format) 2794 return -ENOMEM; 2795 2796 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2797 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2798 gfx_v9_1_parse_ind_reg_list(register_list_format, 2799 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2800 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2801 unique_indirect_regs, 2802 unique_indirect_reg_count, 2803 indirect_start_offsets, 2804 &indirect_start_offsets_count, 2805 ARRAY_SIZE(indirect_start_offsets)); 2806 2807 /* enable auto inc in case it is disabled */ 2808 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2809 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2810 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2811 2812 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2813 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2814 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2815 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2816 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2817 adev->gfx.rlc.register_restore[i]); 2818 2819 /* load indirect register */ 2820 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2821 adev->gfx.rlc.reg_list_format_start); 2822 2823 /* direct register portion */ 2824 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2825 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2826 register_list_format[i]); 2827 2828 /* indirect register portion */ 2829 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2830 if (register_list_format[i] == 0xFFFFFFFF) { 2831 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2832 continue; 2833 } 2834 2835 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2836 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2837 2838 for (j = 0; j < unique_indirect_reg_count; j++) { 2839 if (register_list_format[i] == unique_indirect_regs[j]) { 2840 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2841 break; 2842 } 2843 } 2844 2845 BUG_ON(j >= unique_indirect_reg_count); 2846 2847 i++; 2848 } 2849 2850 /* set save/restore list size */ 2851 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2852 list_size = list_size >> 1; 2853 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2854 adev->gfx.rlc.reg_restore_list_size); 2855 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2856 2857 /* write the starting offsets to RLC scratch ram */ 2858 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2859 adev->gfx.rlc.starting_offsets_start); 2860 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2861 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2862 indirect_start_offsets[i]); 2863 2864 /* load unique indirect regs*/ 2865 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2866 if (unique_indirect_regs[i] != 0) { 2867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2868 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2869 unique_indirect_regs[i] & 0x3FFFF); 2870 2871 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2872 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2873 unique_indirect_regs[i] >> 20); 2874 } 2875 } 2876 2877 kfree(register_list_format); 2878 return 0; 2879 } 2880 2881 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2882 { 2883 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2884 } 2885 2886 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2887 bool enable) 2888 { 2889 uint32_t data = 0; 2890 uint32_t default_data = 0; 2891 2892 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2893 if (enable) { 2894 /* enable GFXIP control over CGPG */ 2895 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2896 if(default_data != data) 2897 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2898 2899 /* update status */ 2900 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2901 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2902 if(default_data != data) 2903 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2904 } else { 2905 /* restore GFXIP control over GCPG */ 2906 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2907 if(default_data != data) 2908 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2909 } 2910 } 2911 2912 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2913 { 2914 uint32_t data = 0; 2915 2916 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2917 AMD_PG_SUPPORT_GFX_SMG | 2918 AMD_PG_SUPPORT_GFX_DMG)) { 2919 /* init IDLE_POLL_COUNT = 60 */ 2920 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2921 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2922 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2923 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2924 2925 /* init RLC PG Delay */ 2926 data = 0; 2927 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2928 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2929 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2930 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2931 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2932 2933 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2934 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2935 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2936 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2937 2938 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2939 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2940 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2941 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2942 2943 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2944 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2945 2946 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2947 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2948 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2949 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0)) 2950 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2951 } 2952 } 2953 2954 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2955 bool enable) 2956 { 2957 uint32_t data = 0; 2958 uint32_t default_data = 0; 2959 2960 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2961 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2962 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2963 enable ? 1 : 0); 2964 if (default_data != data) 2965 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2966 } 2967 2968 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2969 bool enable) 2970 { 2971 uint32_t data = 0; 2972 uint32_t default_data = 0; 2973 2974 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2975 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2976 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2977 enable ? 1 : 0); 2978 if(default_data != data) 2979 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2980 } 2981 2982 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2983 bool enable) 2984 { 2985 uint32_t data = 0; 2986 uint32_t default_data = 0; 2987 2988 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2989 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2990 CP_PG_DISABLE, 2991 enable ? 0 : 1); 2992 if(default_data != data) 2993 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2994 } 2995 2996 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2997 bool enable) 2998 { 2999 uint32_t data, default_data; 3000 3001 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3002 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3003 GFX_POWER_GATING_ENABLE, 3004 enable ? 1 : 0); 3005 if(default_data != data) 3006 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3007 } 3008 3009 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 3010 bool enable) 3011 { 3012 uint32_t data, default_data; 3013 3014 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3015 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3016 GFX_PIPELINE_PG_ENABLE, 3017 enable ? 1 : 0); 3018 if(default_data != data) 3019 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3020 3021 if (!enable) 3022 /* read any GFX register to wake up GFX */ 3023 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3024 } 3025 3026 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3027 bool enable) 3028 { 3029 uint32_t data, default_data; 3030 3031 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3032 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3033 STATIC_PER_CU_PG_ENABLE, 3034 enable ? 1 : 0); 3035 if(default_data != data) 3036 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3037 } 3038 3039 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3040 bool enable) 3041 { 3042 uint32_t data, default_data; 3043 3044 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3045 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3046 DYN_PER_CU_PG_ENABLE, 3047 enable ? 1 : 0); 3048 if(default_data != data) 3049 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3050 } 3051 3052 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3053 { 3054 gfx_v9_0_init_csb(adev); 3055 3056 /* 3057 * Rlc save restore list is workable since v2_1. 3058 * And it's needed by gfxoff feature. 3059 */ 3060 if (adev->gfx.rlc.is_rlc_v2_1) { 3061 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) || 3062 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3063 gfx_v9_1_init_rlc_save_restore_list(adev); 3064 gfx_v9_0_enable_save_restore_machine(adev); 3065 } 3066 3067 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3068 AMD_PG_SUPPORT_GFX_SMG | 3069 AMD_PG_SUPPORT_GFX_DMG | 3070 AMD_PG_SUPPORT_CP | 3071 AMD_PG_SUPPORT_GDS | 3072 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3073 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 3074 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3075 gfx_v9_0_init_gfx_power_gating(adev); 3076 } 3077 } 3078 3079 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3080 { 3081 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3082 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3083 gfx_v9_0_wait_for_rlc_serdes(adev); 3084 } 3085 3086 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3087 { 3088 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3089 udelay(50); 3090 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3091 udelay(50); 3092 } 3093 3094 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3095 { 3096 #ifdef AMDGPU_RLC_DEBUG_RETRY 3097 u32 rlc_ucode_ver; 3098 #endif 3099 3100 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3101 udelay(50); 3102 3103 /* carrizo do enable cp interrupt after cp inited */ 3104 if (!(adev->flags & AMD_IS_APU)) { 3105 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3106 udelay(50); 3107 } 3108 3109 #ifdef AMDGPU_RLC_DEBUG_RETRY 3110 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3111 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3112 if(rlc_ucode_ver == 0x108) { 3113 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3114 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3115 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3116 * default is 0x9C4 to create a 100us interval */ 3117 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3118 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3119 * to disable the page fault retry interrupts, default is 3120 * 0x100 (256) */ 3121 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3122 } 3123 #endif 3124 } 3125 3126 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3127 { 3128 const struct rlc_firmware_header_v2_0 *hdr; 3129 const __le32 *fw_data; 3130 unsigned i, fw_size; 3131 3132 if (!adev->gfx.rlc_fw) 3133 return -EINVAL; 3134 3135 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3136 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3137 3138 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3139 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3140 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3141 3142 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3143 RLCG_UCODE_LOADING_START_ADDRESS); 3144 for (i = 0; i < fw_size; i++) 3145 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3146 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3147 3148 return 0; 3149 } 3150 3151 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3152 { 3153 int r; 3154 3155 if (amdgpu_sriov_vf(adev)) { 3156 gfx_v9_0_init_csb(adev); 3157 return 0; 3158 } 3159 3160 adev->gfx.rlc.funcs->stop(adev); 3161 3162 /* disable CG */ 3163 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3164 3165 gfx_v9_0_init_pg(adev); 3166 3167 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3168 /* legacy rlc firmware loading */ 3169 r = gfx_v9_0_rlc_load_microcode(adev); 3170 if (r) 3171 return r; 3172 } 3173 3174 switch (adev->ip_versions[GC_HWIP][0]) { 3175 case IP_VERSION(9, 2, 2): 3176 case IP_VERSION(9, 1, 0): 3177 if (amdgpu_lbpw == 0) 3178 gfx_v9_0_enable_lbpw(adev, false); 3179 else 3180 gfx_v9_0_enable_lbpw(adev, true); 3181 break; 3182 case IP_VERSION(9, 4, 0): 3183 if (amdgpu_lbpw > 0) 3184 gfx_v9_0_enable_lbpw(adev, true); 3185 else 3186 gfx_v9_0_enable_lbpw(adev, false); 3187 break; 3188 default: 3189 break; 3190 } 3191 3192 adev->gfx.rlc.funcs->start(adev); 3193 3194 return 0; 3195 } 3196 3197 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3198 { 3199 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3200 3201 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3202 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3203 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3204 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3205 udelay(50); 3206 } 3207 3208 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3209 { 3210 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3211 const struct gfx_firmware_header_v1_0 *ce_hdr; 3212 const struct gfx_firmware_header_v1_0 *me_hdr; 3213 const __le32 *fw_data; 3214 unsigned i, fw_size; 3215 3216 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3217 return -EINVAL; 3218 3219 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3220 adev->gfx.pfp_fw->data; 3221 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3222 adev->gfx.ce_fw->data; 3223 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3224 adev->gfx.me_fw->data; 3225 3226 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3227 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3228 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3229 3230 gfx_v9_0_cp_gfx_enable(adev, false); 3231 3232 /* PFP */ 3233 fw_data = (const __le32 *) 3234 (adev->gfx.pfp_fw->data + 3235 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3236 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3237 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3238 for (i = 0; i < fw_size; i++) 3239 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3240 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3241 3242 /* CE */ 3243 fw_data = (const __le32 *) 3244 (adev->gfx.ce_fw->data + 3245 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3246 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3247 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3248 for (i = 0; i < fw_size; i++) 3249 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3250 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3251 3252 /* ME */ 3253 fw_data = (const __le32 *) 3254 (adev->gfx.me_fw->data + 3255 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3256 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3257 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3258 for (i = 0; i < fw_size; i++) 3259 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3260 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3261 3262 return 0; 3263 } 3264 3265 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3266 { 3267 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3268 const struct cs_section_def *sect = NULL; 3269 const struct cs_extent_def *ext = NULL; 3270 int r, i, tmp; 3271 3272 /* init the CP */ 3273 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3274 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3275 3276 gfx_v9_0_cp_gfx_enable(adev, true); 3277 3278 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3279 if (r) { 3280 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3281 return r; 3282 } 3283 3284 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3285 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3286 3287 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3288 amdgpu_ring_write(ring, 0x80000000); 3289 amdgpu_ring_write(ring, 0x80000000); 3290 3291 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3292 for (ext = sect->section; ext->extent != NULL; ++ext) { 3293 if (sect->id == SECT_CONTEXT) { 3294 amdgpu_ring_write(ring, 3295 PACKET3(PACKET3_SET_CONTEXT_REG, 3296 ext->reg_count)); 3297 amdgpu_ring_write(ring, 3298 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3299 for (i = 0; i < ext->reg_count; i++) 3300 amdgpu_ring_write(ring, ext->extent[i]); 3301 } 3302 } 3303 } 3304 3305 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3306 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3307 3308 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3309 amdgpu_ring_write(ring, 0); 3310 3311 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3312 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3313 amdgpu_ring_write(ring, 0x8000); 3314 amdgpu_ring_write(ring, 0x8000); 3315 3316 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3317 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3318 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3319 amdgpu_ring_write(ring, tmp); 3320 amdgpu_ring_write(ring, 0); 3321 3322 amdgpu_ring_commit(ring); 3323 3324 return 0; 3325 } 3326 3327 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3328 { 3329 struct amdgpu_ring *ring; 3330 u32 tmp; 3331 u32 rb_bufsz; 3332 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3333 3334 /* Set the write pointer delay */ 3335 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3336 3337 /* set the RB to use vmid 0 */ 3338 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3339 3340 /* Set ring buffer size */ 3341 ring = &adev->gfx.gfx_ring[0]; 3342 rb_bufsz = order_base_2(ring->ring_size / 8); 3343 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3344 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3345 #ifdef __BIG_ENDIAN 3346 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3347 #endif 3348 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3349 3350 /* Initialize the ring buffer's write pointers */ 3351 ring->wptr = 0; 3352 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3353 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3354 3355 /* set the wb address wether it's enabled or not */ 3356 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3357 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3358 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3359 3360 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3361 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3362 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3363 3364 mdelay(1); 3365 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3366 3367 rb_addr = ring->gpu_addr >> 8; 3368 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3369 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3370 3371 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3372 if (ring->use_doorbell) { 3373 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3374 DOORBELL_OFFSET, ring->doorbell_index); 3375 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3376 DOORBELL_EN, 1); 3377 } else { 3378 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3379 } 3380 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3381 3382 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3383 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3384 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3385 3386 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3387 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3388 3389 3390 /* start the ring */ 3391 gfx_v9_0_cp_gfx_start(adev); 3392 ring->sched.ready = true; 3393 3394 return 0; 3395 } 3396 3397 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3398 { 3399 if (enable) { 3400 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3401 } else { 3402 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3403 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3404 adev->gfx.kiq.ring.sched.ready = false; 3405 } 3406 udelay(50); 3407 } 3408 3409 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3410 { 3411 const struct gfx_firmware_header_v1_0 *mec_hdr; 3412 const __le32 *fw_data; 3413 unsigned i; 3414 u32 tmp; 3415 3416 if (!adev->gfx.mec_fw) 3417 return -EINVAL; 3418 3419 gfx_v9_0_cp_compute_enable(adev, false); 3420 3421 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3422 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3423 3424 fw_data = (const __le32 *) 3425 (adev->gfx.mec_fw->data + 3426 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3427 tmp = 0; 3428 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3429 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3430 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3431 3432 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3433 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3434 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3435 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3436 3437 /* MEC1 */ 3438 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3439 mec_hdr->jt_offset); 3440 for (i = 0; i < mec_hdr->jt_size; i++) 3441 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3442 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3443 3444 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3445 adev->gfx.mec_fw_version); 3446 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3447 3448 return 0; 3449 } 3450 3451 /* KIQ functions */ 3452 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3453 { 3454 uint32_t tmp; 3455 struct amdgpu_device *adev = ring->adev; 3456 3457 /* tell RLC which is KIQ queue */ 3458 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3459 tmp &= 0xffffff00; 3460 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3461 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3462 tmp |= 0x80; 3463 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3464 } 3465 3466 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3467 { 3468 struct amdgpu_device *adev = ring->adev; 3469 3470 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3471 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3472 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3473 mqd->cp_hqd_queue_priority = 3474 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3475 } 3476 } 3477 } 3478 3479 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3480 { 3481 struct amdgpu_device *adev = ring->adev; 3482 struct v9_mqd *mqd = ring->mqd_ptr; 3483 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3484 uint32_t tmp; 3485 3486 mqd->header = 0xC0310800; 3487 mqd->compute_pipelinestat_enable = 0x00000001; 3488 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3489 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3490 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3491 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3492 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3493 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3494 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3495 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3496 mqd->compute_misc_reserved = 0x00000003; 3497 3498 mqd->dynamic_cu_mask_addr_lo = 3499 lower_32_bits(ring->mqd_gpu_addr 3500 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3501 mqd->dynamic_cu_mask_addr_hi = 3502 upper_32_bits(ring->mqd_gpu_addr 3503 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3504 3505 eop_base_addr = ring->eop_gpu_addr >> 8; 3506 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3507 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3508 3509 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3510 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3511 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3512 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3513 3514 mqd->cp_hqd_eop_control = tmp; 3515 3516 /* enable doorbell? */ 3517 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3518 3519 if (ring->use_doorbell) { 3520 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3521 DOORBELL_OFFSET, ring->doorbell_index); 3522 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3523 DOORBELL_EN, 1); 3524 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3525 DOORBELL_SOURCE, 0); 3526 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3527 DOORBELL_HIT, 0); 3528 } else { 3529 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3530 DOORBELL_EN, 0); 3531 } 3532 3533 mqd->cp_hqd_pq_doorbell_control = tmp; 3534 3535 /* disable the queue if it's active */ 3536 ring->wptr = 0; 3537 mqd->cp_hqd_dequeue_request = 0; 3538 mqd->cp_hqd_pq_rptr = 0; 3539 mqd->cp_hqd_pq_wptr_lo = 0; 3540 mqd->cp_hqd_pq_wptr_hi = 0; 3541 3542 /* set the pointer to the MQD */ 3543 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3544 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3545 3546 /* set MQD vmid to 0 */ 3547 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3548 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3549 mqd->cp_mqd_control = tmp; 3550 3551 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3552 hqd_gpu_addr = ring->gpu_addr >> 8; 3553 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3554 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3555 3556 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3557 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3558 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3559 (order_base_2(ring->ring_size / 4) - 1)); 3560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3561 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3562 #ifdef __BIG_ENDIAN 3563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3564 #endif 3565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3568 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3569 mqd->cp_hqd_pq_control = tmp; 3570 3571 /* set the wb address whether it's enabled or not */ 3572 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3573 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3574 mqd->cp_hqd_pq_rptr_report_addr_hi = 3575 upper_32_bits(wb_gpu_addr) & 0xffff; 3576 3577 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3578 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3579 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3580 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3581 3582 tmp = 0; 3583 /* enable the doorbell if requested */ 3584 if (ring->use_doorbell) { 3585 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3586 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3587 DOORBELL_OFFSET, ring->doorbell_index); 3588 3589 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3590 DOORBELL_EN, 1); 3591 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3592 DOORBELL_SOURCE, 0); 3593 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3594 DOORBELL_HIT, 0); 3595 } 3596 3597 mqd->cp_hqd_pq_doorbell_control = tmp; 3598 3599 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3600 ring->wptr = 0; 3601 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3602 3603 /* set the vmid for the queue */ 3604 mqd->cp_hqd_vmid = 0; 3605 3606 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3607 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3608 mqd->cp_hqd_persistent_state = tmp; 3609 3610 /* set MIN_IB_AVAIL_SIZE */ 3611 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3612 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3613 mqd->cp_hqd_ib_control = tmp; 3614 3615 /* set static priority for a queue/ring */ 3616 gfx_v9_0_mqd_set_priority(ring, mqd); 3617 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3618 3619 /* map_queues packet doesn't need activate the queue, 3620 * so only kiq need set this field. 3621 */ 3622 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3623 mqd->cp_hqd_active = 1; 3624 3625 return 0; 3626 } 3627 3628 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3629 { 3630 struct amdgpu_device *adev = ring->adev; 3631 struct v9_mqd *mqd = ring->mqd_ptr; 3632 int j; 3633 3634 /* disable wptr polling */ 3635 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3636 3637 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3638 mqd->cp_hqd_eop_base_addr_lo); 3639 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3640 mqd->cp_hqd_eop_base_addr_hi); 3641 3642 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3643 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3644 mqd->cp_hqd_eop_control); 3645 3646 /* enable doorbell? */ 3647 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3648 mqd->cp_hqd_pq_doorbell_control); 3649 3650 /* disable the queue if it's active */ 3651 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3652 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3653 for (j = 0; j < adev->usec_timeout; j++) { 3654 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3655 break; 3656 udelay(1); 3657 } 3658 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3659 mqd->cp_hqd_dequeue_request); 3660 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3661 mqd->cp_hqd_pq_rptr); 3662 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3663 mqd->cp_hqd_pq_wptr_lo); 3664 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3665 mqd->cp_hqd_pq_wptr_hi); 3666 } 3667 3668 /* set the pointer to the MQD */ 3669 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3670 mqd->cp_mqd_base_addr_lo); 3671 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3672 mqd->cp_mqd_base_addr_hi); 3673 3674 /* set MQD vmid to 0 */ 3675 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3676 mqd->cp_mqd_control); 3677 3678 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3679 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3680 mqd->cp_hqd_pq_base_lo); 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3682 mqd->cp_hqd_pq_base_hi); 3683 3684 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3686 mqd->cp_hqd_pq_control); 3687 3688 /* set the wb address whether it's enabled or not */ 3689 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3690 mqd->cp_hqd_pq_rptr_report_addr_lo); 3691 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3692 mqd->cp_hqd_pq_rptr_report_addr_hi); 3693 3694 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3695 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3696 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3697 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3698 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3699 3700 /* enable the doorbell if requested */ 3701 if (ring->use_doorbell) { 3702 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3703 (adev->doorbell_index.kiq * 2) << 2); 3704 /* If GC has entered CGPG, ringing doorbell > first page 3705 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3706 * workaround this issue. And this change has to align with firmware 3707 * update. 3708 */ 3709 if (check_if_enlarge_doorbell_range(adev)) 3710 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3711 (adev->doorbell.size - 4)); 3712 else 3713 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3714 (adev->doorbell_index.userqueue_end * 2) << 2); 3715 } 3716 3717 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3718 mqd->cp_hqd_pq_doorbell_control); 3719 3720 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3721 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3722 mqd->cp_hqd_pq_wptr_lo); 3723 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3724 mqd->cp_hqd_pq_wptr_hi); 3725 3726 /* set the vmid for the queue */ 3727 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3728 3729 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3730 mqd->cp_hqd_persistent_state); 3731 3732 /* activate the queue */ 3733 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3734 mqd->cp_hqd_active); 3735 3736 if (ring->use_doorbell) 3737 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3738 3739 return 0; 3740 } 3741 3742 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3743 { 3744 struct amdgpu_device *adev = ring->adev; 3745 int j; 3746 3747 /* disable the queue if it's active */ 3748 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3749 3750 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3751 3752 for (j = 0; j < adev->usec_timeout; j++) { 3753 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3754 break; 3755 udelay(1); 3756 } 3757 3758 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3759 DRM_DEBUG("KIQ dequeue request failed.\n"); 3760 3761 /* Manual disable if dequeue request times out */ 3762 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3763 } 3764 3765 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3766 0); 3767 } 3768 3769 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3770 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3771 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3772 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3773 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3774 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3775 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3776 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3777 3778 return 0; 3779 } 3780 3781 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3782 { 3783 struct amdgpu_device *adev = ring->adev; 3784 struct v9_mqd *mqd = ring->mqd_ptr; 3785 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3786 struct v9_mqd *tmp_mqd; 3787 3788 gfx_v9_0_kiq_setting(ring); 3789 3790 /* GPU could be in bad state during probe, driver trigger the reset 3791 * after load the SMU, in this case , the mqd is not be initialized. 3792 * driver need to re-init the mqd. 3793 * check mqd->cp_hqd_pq_control since this value should not be 0 3794 */ 3795 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3796 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3797 /* for GPU_RESET case , reset MQD to a clean status */ 3798 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3799 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3800 3801 /* reset ring buffer */ 3802 ring->wptr = 0; 3803 amdgpu_ring_clear_ring(ring); 3804 3805 mutex_lock(&adev->srbm_mutex); 3806 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3807 gfx_v9_0_kiq_init_register(ring); 3808 soc15_grbm_select(adev, 0, 0, 0, 0); 3809 mutex_unlock(&adev->srbm_mutex); 3810 } else { 3811 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3812 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3813 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3814 mutex_lock(&adev->srbm_mutex); 3815 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3816 gfx_v9_0_mqd_init(ring); 3817 gfx_v9_0_kiq_init_register(ring); 3818 soc15_grbm_select(adev, 0, 0, 0, 0); 3819 mutex_unlock(&adev->srbm_mutex); 3820 3821 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3822 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3823 } 3824 3825 return 0; 3826 } 3827 3828 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3829 { 3830 struct amdgpu_device *adev = ring->adev; 3831 struct v9_mqd *mqd = ring->mqd_ptr; 3832 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3833 struct v9_mqd *tmp_mqd; 3834 3835 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3836 * is not be initialized before 3837 */ 3838 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3839 3840 if (!tmp_mqd->cp_hqd_pq_control || 3841 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3842 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3843 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3844 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3845 mutex_lock(&adev->srbm_mutex); 3846 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3847 gfx_v9_0_mqd_init(ring); 3848 soc15_grbm_select(adev, 0, 0, 0, 0); 3849 mutex_unlock(&adev->srbm_mutex); 3850 3851 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3852 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3853 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 3854 /* reset MQD to a clean status */ 3855 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3856 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3857 3858 /* reset ring buffer */ 3859 ring->wptr = 0; 3860 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 3861 amdgpu_ring_clear_ring(ring); 3862 } else { 3863 amdgpu_ring_clear_ring(ring); 3864 } 3865 3866 return 0; 3867 } 3868 3869 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3870 { 3871 struct amdgpu_ring *ring; 3872 int r; 3873 3874 ring = &adev->gfx.kiq.ring; 3875 3876 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3877 if (unlikely(r != 0)) 3878 return r; 3879 3880 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3881 if (unlikely(r != 0)) 3882 return r; 3883 3884 gfx_v9_0_kiq_init_queue(ring); 3885 amdgpu_bo_kunmap(ring->mqd_obj); 3886 ring->mqd_ptr = NULL; 3887 amdgpu_bo_unreserve(ring->mqd_obj); 3888 ring->sched.ready = true; 3889 return 0; 3890 } 3891 3892 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3893 { 3894 struct amdgpu_ring *ring = NULL; 3895 int r = 0, i; 3896 3897 gfx_v9_0_cp_compute_enable(adev, true); 3898 3899 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3900 ring = &adev->gfx.compute_ring[i]; 3901 3902 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3903 if (unlikely(r != 0)) 3904 goto done; 3905 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3906 if (!r) { 3907 r = gfx_v9_0_kcq_init_queue(ring); 3908 amdgpu_bo_kunmap(ring->mqd_obj); 3909 ring->mqd_ptr = NULL; 3910 } 3911 amdgpu_bo_unreserve(ring->mqd_obj); 3912 if (r) 3913 goto done; 3914 } 3915 3916 r = amdgpu_gfx_enable_kcq(adev); 3917 done: 3918 return r; 3919 } 3920 3921 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3922 { 3923 int r, i; 3924 struct amdgpu_ring *ring; 3925 3926 if (!(adev->flags & AMD_IS_APU)) 3927 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3928 3929 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3930 if (adev->gfx.num_gfx_rings) { 3931 /* legacy firmware loading */ 3932 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3933 if (r) 3934 return r; 3935 } 3936 3937 r = gfx_v9_0_cp_compute_load_microcode(adev); 3938 if (r) 3939 return r; 3940 } 3941 3942 r = gfx_v9_0_kiq_resume(adev); 3943 if (r) 3944 return r; 3945 3946 if (adev->gfx.num_gfx_rings) { 3947 r = gfx_v9_0_cp_gfx_resume(adev); 3948 if (r) 3949 return r; 3950 } 3951 3952 r = gfx_v9_0_kcq_resume(adev); 3953 if (r) 3954 return r; 3955 3956 if (adev->gfx.num_gfx_rings) { 3957 ring = &adev->gfx.gfx_ring[0]; 3958 r = amdgpu_ring_test_helper(ring); 3959 if (r) 3960 return r; 3961 } 3962 3963 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3964 ring = &adev->gfx.compute_ring[i]; 3965 amdgpu_ring_test_helper(ring); 3966 } 3967 3968 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3969 3970 return 0; 3971 } 3972 3973 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3974 { 3975 u32 tmp; 3976 3977 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) && 3978 adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)) 3979 return; 3980 3981 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3982 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3983 adev->df.hash_status.hash_64k); 3984 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3985 adev->df.hash_status.hash_2m); 3986 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3987 adev->df.hash_status.hash_1g); 3988 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3989 } 3990 3991 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3992 { 3993 if (adev->gfx.num_gfx_rings) 3994 gfx_v9_0_cp_gfx_enable(adev, enable); 3995 gfx_v9_0_cp_compute_enable(adev, enable); 3996 } 3997 3998 static int gfx_v9_0_hw_init(void *handle) 3999 { 4000 int r; 4001 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4002 4003 if (!amdgpu_sriov_vf(adev)) 4004 gfx_v9_0_init_golden_registers(adev); 4005 4006 gfx_v9_0_constants_init(adev); 4007 4008 gfx_v9_0_init_tcp_config(adev); 4009 4010 r = adev->gfx.rlc.funcs->resume(adev); 4011 if (r) 4012 return r; 4013 4014 r = gfx_v9_0_cp_resume(adev); 4015 if (r) 4016 return r; 4017 4018 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 4019 gfx_v9_4_2_set_power_brake_sequence(adev); 4020 4021 return r; 4022 } 4023 4024 static int gfx_v9_0_hw_fini(void *handle) 4025 { 4026 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4027 4028 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4029 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4030 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4031 4032 /* DF freeze and kcq disable will fail */ 4033 if (!amdgpu_ras_intr_triggered()) 4034 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4035 amdgpu_gfx_disable_kcq(adev); 4036 4037 if (amdgpu_sriov_vf(adev)) { 4038 gfx_v9_0_cp_gfx_enable(adev, false); 4039 /* must disable polling for SRIOV when hw finished, otherwise 4040 * CPC engine may still keep fetching WB address which is already 4041 * invalid after sw finished and trigger DMAR reading error in 4042 * hypervisor side. 4043 */ 4044 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4045 return 0; 4046 } 4047 4048 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4049 * otherwise KIQ is hanging when binding back 4050 */ 4051 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4052 mutex_lock(&adev->srbm_mutex); 4053 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 4054 adev->gfx.kiq.ring.pipe, 4055 adev->gfx.kiq.ring.queue, 0); 4056 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 4057 soc15_grbm_select(adev, 0, 0, 0, 0); 4058 mutex_unlock(&adev->srbm_mutex); 4059 } 4060 4061 gfx_v9_0_cp_enable(adev, false); 4062 4063 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 4064 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 4065 (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) { 4066 dev_dbg(adev->dev, "Skipping RLC halt\n"); 4067 return 0; 4068 } 4069 4070 adev->gfx.rlc.funcs->stop(adev); 4071 return 0; 4072 } 4073 4074 static int gfx_v9_0_suspend(void *handle) 4075 { 4076 return gfx_v9_0_hw_fini(handle); 4077 } 4078 4079 static int gfx_v9_0_resume(void *handle) 4080 { 4081 return gfx_v9_0_hw_init(handle); 4082 } 4083 4084 static bool gfx_v9_0_is_idle(void *handle) 4085 { 4086 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4087 4088 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4089 GRBM_STATUS, GUI_ACTIVE)) 4090 return false; 4091 else 4092 return true; 4093 } 4094 4095 static int gfx_v9_0_wait_for_idle(void *handle) 4096 { 4097 unsigned i; 4098 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4099 4100 for (i = 0; i < adev->usec_timeout; i++) { 4101 if (gfx_v9_0_is_idle(handle)) 4102 return 0; 4103 udelay(1); 4104 } 4105 return -ETIMEDOUT; 4106 } 4107 4108 static int gfx_v9_0_soft_reset(void *handle) 4109 { 4110 u32 grbm_soft_reset = 0; 4111 u32 tmp; 4112 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4113 4114 /* GRBM_STATUS */ 4115 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4116 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4117 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4118 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4119 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4120 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4121 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4122 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4123 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4124 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4125 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4126 } 4127 4128 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4129 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4130 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4131 } 4132 4133 /* GRBM_STATUS2 */ 4134 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4135 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4136 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4137 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4138 4139 4140 if (grbm_soft_reset) { 4141 /* stop the rlc */ 4142 adev->gfx.rlc.funcs->stop(adev); 4143 4144 if (adev->gfx.num_gfx_rings) 4145 /* Disable GFX parsing/prefetching */ 4146 gfx_v9_0_cp_gfx_enable(adev, false); 4147 4148 /* Disable MEC parsing/prefetching */ 4149 gfx_v9_0_cp_compute_enable(adev, false); 4150 4151 if (grbm_soft_reset) { 4152 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4153 tmp |= grbm_soft_reset; 4154 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4155 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4156 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4157 4158 udelay(50); 4159 4160 tmp &= ~grbm_soft_reset; 4161 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4162 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4163 } 4164 4165 /* Wait a little for things to settle down */ 4166 udelay(50); 4167 } 4168 return 0; 4169 } 4170 4171 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4172 { 4173 signed long r, cnt = 0; 4174 unsigned long flags; 4175 uint32_t seq, reg_val_offs = 0; 4176 uint64_t value = 0; 4177 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4178 struct amdgpu_ring *ring = &kiq->ring; 4179 4180 BUG_ON(!ring->funcs->emit_rreg); 4181 4182 spin_lock_irqsave(&kiq->ring_lock, flags); 4183 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4184 pr_err("critical bug! too many kiq readers\n"); 4185 goto failed_unlock; 4186 } 4187 amdgpu_ring_alloc(ring, 32); 4188 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4189 amdgpu_ring_write(ring, 9 | /* src: register*/ 4190 (5 << 8) | /* dst: memory */ 4191 (1 << 16) | /* count sel */ 4192 (1 << 20)); /* write confirm */ 4193 amdgpu_ring_write(ring, 0); 4194 amdgpu_ring_write(ring, 0); 4195 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4196 reg_val_offs * 4)); 4197 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4198 reg_val_offs * 4)); 4199 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4200 if (r) 4201 goto failed_undo; 4202 4203 amdgpu_ring_commit(ring); 4204 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4205 4206 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4207 4208 /* don't wait anymore for gpu reset case because this way may 4209 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4210 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4211 * never return if we keep waiting in virt_kiq_rreg, which cause 4212 * gpu_recover() hang there. 4213 * 4214 * also don't wait anymore for IRQ context 4215 * */ 4216 if (r < 1 && (amdgpu_in_reset(adev))) 4217 goto failed_kiq_read; 4218 4219 might_sleep(); 4220 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4221 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4222 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4223 } 4224 4225 if (cnt > MAX_KIQ_REG_TRY) 4226 goto failed_kiq_read; 4227 4228 mb(); 4229 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4230 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4231 amdgpu_device_wb_free(adev, reg_val_offs); 4232 return value; 4233 4234 failed_undo: 4235 amdgpu_ring_undo(ring); 4236 failed_unlock: 4237 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4238 failed_kiq_read: 4239 if (reg_val_offs) 4240 amdgpu_device_wb_free(adev, reg_val_offs); 4241 pr_err("failed to read gpu clock\n"); 4242 return ~0; 4243 } 4244 4245 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4246 { 4247 uint64_t clock, clock_lo, clock_hi, hi_check; 4248 4249 switch (adev->ip_versions[GC_HWIP][0]) { 4250 case IP_VERSION(9, 3, 0): 4251 preempt_disable(); 4252 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4253 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4254 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4255 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4256 * roughly every 42 seconds. 4257 */ 4258 if (hi_check != clock_hi) { 4259 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4260 clock_hi = hi_check; 4261 } 4262 preempt_enable(); 4263 clock = clock_lo | (clock_hi << 32ULL); 4264 break; 4265 default: 4266 amdgpu_gfx_off_ctrl(adev, false); 4267 mutex_lock(&adev->gfx.gpu_clock_mutex); 4268 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) { 4269 clock = gfx_v9_0_kiq_read_clock(adev); 4270 } else { 4271 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4272 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4273 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4274 } 4275 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4276 amdgpu_gfx_off_ctrl(adev, true); 4277 break; 4278 } 4279 return clock; 4280 } 4281 4282 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4283 uint32_t vmid, 4284 uint32_t gds_base, uint32_t gds_size, 4285 uint32_t gws_base, uint32_t gws_size, 4286 uint32_t oa_base, uint32_t oa_size) 4287 { 4288 struct amdgpu_device *adev = ring->adev; 4289 4290 /* GDS Base */ 4291 gfx_v9_0_write_data_to_reg(ring, 0, false, 4292 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4293 gds_base); 4294 4295 /* GDS Size */ 4296 gfx_v9_0_write_data_to_reg(ring, 0, false, 4297 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4298 gds_size); 4299 4300 /* GWS */ 4301 gfx_v9_0_write_data_to_reg(ring, 0, false, 4302 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4303 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4304 4305 /* OA */ 4306 gfx_v9_0_write_data_to_reg(ring, 0, false, 4307 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4308 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4309 } 4310 4311 static const u32 vgpr_init_compute_shader[] = 4312 { 4313 0xb07c0000, 0xbe8000ff, 4314 0x000000f8, 0xbf110800, 4315 0x7e000280, 0x7e020280, 4316 0x7e040280, 0x7e060280, 4317 0x7e080280, 0x7e0a0280, 4318 0x7e0c0280, 0x7e0e0280, 4319 0x80808800, 0xbe803200, 4320 0xbf84fff5, 0xbf9c0000, 4321 0xd28c0001, 0x0001007f, 4322 0xd28d0001, 0x0002027e, 4323 0x10020288, 0xb8810904, 4324 0xb7814000, 0xd1196a01, 4325 0x00000301, 0xbe800087, 4326 0xbefc00c1, 0xd89c4000, 4327 0x00020201, 0xd89cc080, 4328 0x00040401, 0x320202ff, 4329 0x00000800, 0x80808100, 4330 0xbf84fff8, 0x7e020280, 4331 0xbf810000, 0x00000000, 4332 }; 4333 4334 static const u32 sgpr_init_compute_shader[] = 4335 { 4336 0xb07c0000, 0xbe8000ff, 4337 0x0000005f, 0xbee50080, 4338 0xbe812c65, 0xbe822c65, 4339 0xbe832c65, 0xbe842c65, 4340 0xbe852c65, 0xb77c0005, 4341 0x80808500, 0xbf84fff8, 4342 0xbe800080, 0xbf810000, 4343 }; 4344 4345 static const u32 vgpr_init_compute_shader_arcturus[] = { 4346 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4347 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4348 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4349 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4350 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4351 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4352 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4353 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4354 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4355 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4356 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4357 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4358 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4359 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4360 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4361 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4362 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4363 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4364 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4365 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4366 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4367 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4368 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4369 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4370 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4371 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4372 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4373 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4374 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4375 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4376 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4377 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4378 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4379 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4380 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4381 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4382 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4383 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4384 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4385 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4386 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4387 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4388 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4389 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4390 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4391 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4392 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4393 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4394 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4395 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4396 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4397 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4398 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4399 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4400 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4401 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4402 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4403 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4404 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4405 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4406 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4407 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4408 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4409 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4410 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4411 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4412 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4413 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4414 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4415 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4416 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4417 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4418 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4419 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4420 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4421 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4422 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4423 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4424 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4425 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4426 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4427 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4428 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4429 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4430 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4431 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4432 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4433 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4434 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4435 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4436 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4437 0xbf84fff8, 0xbf810000, 4438 }; 4439 4440 /* When below register arrays changed, please update gpr_reg_size, 4441 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4442 to cover all gfx9 ASICs */ 4443 static const struct soc15_reg_entry vgpr_init_regs[] = { 4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4453 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4455 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4458 }; 4459 4460 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4465 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4466 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4467 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4468 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4470 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4471 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4472 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4473 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4474 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4475 }; 4476 4477 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4490 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4491 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4492 }; 4493 4494 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4495 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4509 }; 4510 4511 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4512 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4513 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4514 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4515 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4516 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4517 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4518 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4519 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4520 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4521 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4522 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4523 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4524 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4525 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4526 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4527 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4528 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4529 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4530 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4531 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4532 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4533 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4534 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4535 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4536 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4537 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4538 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4539 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4540 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4541 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4542 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4543 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4544 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4545 }; 4546 4547 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4548 { 4549 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4550 int i, r; 4551 4552 /* only support when RAS is enabled */ 4553 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4554 return 0; 4555 4556 r = amdgpu_ring_alloc(ring, 7); 4557 if (r) { 4558 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4559 ring->name, r); 4560 return r; 4561 } 4562 4563 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4564 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4565 4566 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4567 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4568 PACKET3_DMA_DATA_DST_SEL(1) | 4569 PACKET3_DMA_DATA_SRC_SEL(2) | 4570 PACKET3_DMA_DATA_ENGINE(0))); 4571 amdgpu_ring_write(ring, 0); 4572 amdgpu_ring_write(ring, 0); 4573 amdgpu_ring_write(ring, 0); 4574 amdgpu_ring_write(ring, 0); 4575 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4576 adev->gds.gds_size); 4577 4578 amdgpu_ring_commit(ring); 4579 4580 for (i = 0; i < adev->usec_timeout; i++) { 4581 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4582 break; 4583 udelay(1); 4584 } 4585 4586 if (i >= adev->usec_timeout) 4587 r = -ETIMEDOUT; 4588 4589 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4590 4591 return r; 4592 } 4593 4594 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4595 { 4596 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4597 struct amdgpu_ib ib; 4598 struct dma_fence *f = NULL; 4599 int r, i; 4600 unsigned total_size, vgpr_offset, sgpr_offset; 4601 u64 gpu_addr; 4602 4603 int compute_dim_x = adev->gfx.config.max_shader_engines * 4604 adev->gfx.config.max_cu_per_sh * 4605 adev->gfx.config.max_sh_per_se; 4606 int sgpr_work_group_size = 5; 4607 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4608 int vgpr_init_shader_size; 4609 const u32 *vgpr_init_shader_ptr; 4610 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4611 4612 /* only support when RAS is enabled */ 4613 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4614 return 0; 4615 4616 /* bail if the compute ring is not ready */ 4617 if (!ring->sched.ready) 4618 return 0; 4619 4620 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) { 4621 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4622 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4623 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4624 } else { 4625 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4626 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4627 vgpr_init_regs_ptr = vgpr_init_regs; 4628 } 4629 4630 total_size = 4631 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4632 total_size += 4633 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4634 total_size += 4635 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4636 total_size = ALIGN(total_size, 256); 4637 vgpr_offset = total_size; 4638 total_size += ALIGN(vgpr_init_shader_size, 256); 4639 sgpr_offset = total_size; 4640 total_size += sizeof(sgpr_init_compute_shader); 4641 4642 /* allocate an indirect buffer to put the commands in */ 4643 memset(&ib, 0, sizeof(ib)); 4644 r = amdgpu_ib_get(adev, NULL, total_size, 4645 AMDGPU_IB_POOL_DIRECT, &ib); 4646 if (r) { 4647 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4648 return r; 4649 } 4650 4651 /* load the compute shaders */ 4652 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4653 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4654 4655 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4656 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4657 4658 /* init the ib length to 0 */ 4659 ib.length_dw = 0; 4660 4661 /* VGPR */ 4662 /* write the register state for the compute dispatch */ 4663 for (i = 0; i < gpr_reg_size; i++) { 4664 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4665 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4666 - PACKET3_SET_SH_REG_START; 4667 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4668 } 4669 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4670 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4671 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4672 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4673 - PACKET3_SET_SH_REG_START; 4674 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4675 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4676 4677 /* write dispatch packet */ 4678 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4679 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4680 ib.ptr[ib.length_dw++] = 1; /* y */ 4681 ib.ptr[ib.length_dw++] = 1; /* z */ 4682 ib.ptr[ib.length_dw++] = 4683 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4684 4685 /* write CS partial flush packet */ 4686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4687 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4688 4689 /* SGPR1 */ 4690 /* write the register state for the compute dispatch */ 4691 for (i = 0; i < gpr_reg_size; i++) { 4692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4693 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4694 - PACKET3_SET_SH_REG_START; 4695 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4696 } 4697 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4698 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4699 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4700 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4701 - PACKET3_SET_SH_REG_START; 4702 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4703 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4704 4705 /* write dispatch packet */ 4706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4707 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4708 ib.ptr[ib.length_dw++] = 1; /* y */ 4709 ib.ptr[ib.length_dw++] = 1; /* z */ 4710 ib.ptr[ib.length_dw++] = 4711 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4712 4713 /* write CS partial flush packet */ 4714 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4715 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4716 4717 /* SGPR2 */ 4718 /* write the register state for the compute dispatch */ 4719 for (i = 0; i < gpr_reg_size; i++) { 4720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4721 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4722 - PACKET3_SET_SH_REG_START; 4723 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4724 } 4725 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4726 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4728 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4729 - PACKET3_SET_SH_REG_START; 4730 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4731 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4732 4733 /* write dispatch packet */ 4734 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4735 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4736 ib.ptr[ib.length_dw++] = 1; /* y */ 4737 ib.ptr[ib.length_dw++] = 1; /* z */ 4738 ib.ptr[ib.length_dw++] = 4739 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4740 4741 /* write CS partial flush packet */ 4742 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4743 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4744 4745 /* shedule the ib on the ring */ 4746 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4747 if (r) { 4748 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4749 goto fail; 4750 } 4751 4752 /* wait for the GPU to finish processing the IB */ 4753 r = dma_fence_wait(f, false); 4754 if (r) { 4755 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4756 goto fail; 4757 } 4758 4759 fail: 4760 amdgpu_ib_free(adev, &ib, NULL); 4761 dma_fence_put(f); 4762 4763 return r; 4764 } 4765 4766 static int gfx_v9_0_early_init(void *handle) 4767 { 4768 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4769 4770 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || 4771 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 4772 adev->gfx.num_gfx_rings = 0; 4773 else 4774 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4775 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4776 AMDGPU_MAX_COMPUTE_RINGS); 4777 gfx_v9_0_set_kiq_pm4_funcs(adev); 4778 gfx_v9_0_set_ring_funcs(adev); 4779 gfx_v9_0_set_irq_funcs(adev); 4780 gfx_v9_0_set_gds_init(adev); 4781 gfx_v9_0_set_rlc_funcs(adev); 4782 4783 return 0; 4784 } 4785 4786 static int gfx_v9_0_ecc_late_init(void *handle) 4787 { 4788 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4789 int r; 4790 4791 /* 4792 * Temp workaround to fix the issue that CP firmware fails to 4793 * update read pointer when CPDMA is writing clearing operation 4794 * to GDS in suspend/resume sequence on several cards. So just 4795 * limit this operation in cold boot sequence. 4796 */ 4797 if ((!adev->in_suspend) && 4798 (adev->gds.gds_size)) { 4799 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4800 if (r) 4801 return r; 4802 } 4803 4804 /* requires IBs so do in late init after IB pool is initialized */ 4805 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 4806 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4807 else 4808 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4809 4810 if (r) 4811 return r; 4812 4813 if (adev->gfx.ras_funcs && 4814 adev->gfx.ras_funcs->ras_late_init) { 4815 r = adev->gfx.ras_funcs->ras_late_init(adev); 4816 if (r) 4817 return r; 4818 } 4819 4820 if (adev->gfx.ras_funcs && 4821 adev->gfx.ras_funcs->enable_watchdog_timer) 4822 adev->gfx.ras_funcs->enable_watchdog_timer(adev); 4823 4824 return 0; 4825 } 4826 4827 static int gfx_v9_0_late_init(void *handle) 4828 { 4829 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4830 int r; 4831 4832 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4833 if (r) 4834 return r; 4835 4836 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4837 if (r) 4838 return r; 4839 4840 r = gfx_v9_0_ecc_late_init(handle); 4841 if (r) 4842 return r; 4843 4844 return 0; 4845 } 4846 4847 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4848 { 4849 uint32_t rlc_setting; 4850 4851 /* if RLC is not enabled, do nothing */ 4852 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4853 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4854 return false; 4855 4856 return true; 4857 } 4858 4859 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4860 { 4861 uint32_t data; 4862 unsigned i; 4863 4864 data = RLC_SAFE_MODE__CMD_MASK; 4865 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4866 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4867 4868 /* wait for RLC_SAFE_MODE */ 4869 for (i = 0; i < adev->usec_timeout; i++) { 4870 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4871 break; 4872 udelay(1); 4873 } 4874 } 4875 4876 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4877 { 4878 uint32_t data; 4879 4880 data = RLC_SAFE_MODE__CMD_MASK; 4881 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4882 } 4883 4884 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4885 bool enable) 4886 { 4887 amdgpu_gfx_rlc_enter_safe_mode(adev); 4888 4889 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4890 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4891 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4892 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4893 } else { 4894 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4895 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4896 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4897 } 4898 4899 amdgpu_gfx_rlc_exit_safe_mode(adev); 4900 } 4901 4902 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4903 bool enable) 4904 { 4905 /* TODO: double check if we need to perform under safe mode */ 4906 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4907 4908 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4909 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4910 else 4911 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4912 4913 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4914 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4915 else 4916 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4917 4918 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4919 } 4920 4921 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4922 bool enable) 4923 { 4924 uint32_t data, def; 4925 4926 amdgpu_gfx_rlc_enter_safe_mode(adev); 4927 4928 /* It is disabled by HW by default */ 4929 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4930 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4931 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4932 4933 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1)) 4934 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4935 4936 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4937 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4938 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4939 4940 /* only for Vega10 & Raven1 */ 4941 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4942 4943 if (def != data) 4944 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4945 4946 /* MGLS is a global flag to control all MGLS in GFX */ 4947 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4948 /* 2 - RLC memory Light sleep */ 4949 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4950 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4951 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4952 if (def != data) 4953 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4954 } 4955 /* 3 - CP memory Light sleep */ 4956 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4957 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4958 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4959 if (def != data) 4960 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4961 } 4962 } 4963 } else { 4964 /* 1 - MGCG_OVERRIDE */ 4965 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4966 4967 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1)) 4968 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4969 4970 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4971 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4972 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4973 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4974 4975 if (def != data) 4976 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4977 4978 /* 2 - disable MGLS in RLC */ 4979 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4980 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4981 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4982 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4983 } 4984 4985 /* 3 - disable MGLS in CP */ 4986 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4987 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4988 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4989 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4990 } 4991 } 4992 4993 amdgpu_gfx_rlc_exit_safe_mode(adev); 4994 } 4995 4996 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4997 bool enable) 4998 { 4999 uint32_t data, def; 5000 5001 if (!adev->gfx.num_gfx_rings) 5002 return; 5003 5004 amdgpu_gfx_rlc_enter_safe_mode(adev); 5005 5006 /* Enable 3D CGCG/CGLS */ 5007 if (enable) { 5008 /* write cmd to clear cgcg/cgls ov */ 5009 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5010 /* unset CGCG override */ 5011 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5012 /* update CGCG and CGLS override bits */ 5013 if (def != data) 5014 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5015 5016 /* enable 3Dcgcg FSM(0x0000363f) */ 5017 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5018 5019 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5020 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5021 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5022 else 5023 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 5024 5025 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5026 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5027 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5028 if (def != data) 5029 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5030 5031 /* set IDLE_POLL_COUNT(0x00900100) */ 5032 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5033 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5034 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5035 if (def != data) 5036 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5037 } else { 5038 /* Disable CGCG/CGLS */ 5039 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5040 /* disable cgcg, cgls should be disabled */ 5041 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 5042 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 5043 /* disable cgcg and cgls in FSM */ 5044 if (def != data) 5045 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5046 } 5047 5048 amdgpu_gfx_rlc_exit_safe_mode(adev); 5049 } 5050 5051 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5052 bool enable) 5053 { 5054 uint32_t def, data; 5055 5056 amdgpu_gfx_rlc_enter_safe_mode(adev); 5057 5058 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5059 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5060 /* unset CGCG override */ 5061 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5062 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5063 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5064 else 5065 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5066 /* update CGCG and CGLS override bits */ 5067 if (def != data) 5068 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5069 5070 /* enable cgcg FSM(0x0000363F) */ 5071 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5072 5073 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) 5074 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5075 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5076 else 5077 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5078 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5079 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5080 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5081 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5082 if (def != data) 5083 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5084 5085 /* set IDLE_POLL_COUNT(0x00900100) */ 5086 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5087 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5088 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5089 if (def != data) 5090 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5091 } else { 5092 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5093 /* reset CGCG/CGLS bits */ 5094 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5095 /* disable cgcg and cgls in FSM */ 5096 if (def != data) 5097 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5098 } 5099 5100 amdgpu_gfx_rlc_exit_safe_mode(adev); 5101 } 5102 5103 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5104 bool enable) 5105 { 5106 if (enable) { 5107 /* CGCG/CGLS should be enabled after MGCG/MGLS 5108 * === MGCG + MGLS === 5109 */ 5110 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5111 /* === CGCG /CGLS for GFX 3D Only === */ 5112 gfx_v9_0_update_3d_clock_gating(adev, enable); 5113 /* === CGCG + CGLS === */ 5114 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5115 } else { 5116 /* CGCG/CGLS should be disabled before MGCG/MGLS 5117 * === CGCG + CGLS === 5118 */ 5119 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5120 /* === CGCG /CGLS for GFX 3D Only === */ 5121 gfx_v9_0_update_3d_clock_gating(adev, enable); 5122 /* === MGCG + MGLS === */ 5123 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5124 } 5125 return 0; 5126 } 5127 5128 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5129 { 5130 u32 reg, data; 5131 5132 amdgpu_gfx_off_ctrl(adev, false); 5133 5134 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5135 if (amdgpu_sriov_is_pp_one_vf(adev)) 5136 data = RREG32_NO_KIQ(reg); 5137 else 5138 data = RREG32(reg); 5139 5140 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5141 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5142 5143 if (amdgpu_sriov_is_pp_one_vf(adev)) 5144 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5145 else 5146 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5147 5148 amdgpu_gfx_off_ctrl(adev, true); 5149 } 5150 5151 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5152 uint32_t offset, 5153 struct soc15_reg_rlcg *entries, int arr_size) 5154 { 5155 int i; 5156 uint32_t reg; 5157 5158 if (!entries) 5159 return false; 5160 5161 for (i = 0; i < arr_size; i++) { 5162 const struct soc15_reg_rlcg *entry; 5163 5164 entry = &entries[i]; 5165 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5166 if (offset == reg) 5167 return true; 5168 } 5169 5170 return false; 5171 } 5172 5173 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5174 { 5175 return gfx_v9_0_check_rlcg_range(adev, offset, 5176 (void *)rlcg_access_gc_9_0, 5177 ARRAY_SIZE(rlcg_access_gc_9_0)); 5178 } 5179 5180 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5181 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5182 .set_safe_mode = gfx_v9_0_set_safe_mode, 5183 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5184 .init = gfx_v9_0_rlc_init, 5185 .get_csb_size = gfx_v9_0_get_csb_size, 5186 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5187 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5188 .resume = gfx_v9_0_rlc_resume, 5189 .stop = gfx_v9_0_rlc_stop, 5190 .reset = gfx_v9_0_rlc_reset, 5191 .start = gfx_v9_0_rlc_start, 5192 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5193 .sriov_wreg = gfx_v9_0_sriov_wreg, 5194 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5195 }; 5196 5197 static int gfx_v9_0_set_powergating_state(void *handle, 5198 enum amd_powergating_state state) 5199 { 5200 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5201 bool enable = (state == AMD_PG_STATE_GATE); 5202 5203 switch (adev->ip_versions[GC_HWIP][0]) { 5204 case IP_VERSION(9, 2, 2): 5205 case IP_VERSION(9, 1, 0): 5206 case IP_VERSION(9, 3, 0): 5207 if (!enable) 5208 amdgpu_gfx_off_ctrl(adev, false); 5209 5210 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5211 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5212 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5213 } else { 5214 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5215 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5216 } 5217 5218 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5219 gfx_v9_0_enable_cp_power_gating(adev, true); 5220 else 5221 gfx_v9_0_enable_cp_power_gating(adev, false); 5222 5223 /* update gfx cgpg state */ 5224 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5225 5226 /* update mgcg state */ 5227 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5228 5229 if (enable) 5230 amdgpu_gfx_off_ctrl(adev, true); 5231 break; 5232 case IP_VERSION(9, 2, 1): 5233 amdgpu_gfx_off_ctrl(adev, enable); 5234 break; 5235 default: 5236 break; 5237 } 5238 5239 return 0; 5240 } 5241 5242 static int gfx_v9_0_set_clockgating_state(void *handle, 5243 enum amd_clockgating_state state) 5244 { 5245 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5246 5247 if (amdgpu_sriov_vf(adev)) 5248 return 0; 5249 5250 switch (adev->ip_versions[GC_HWIP][0]) { 5251 case IP_VERSION(9, 0, 1): 5252 case IP_VERSION(9, 2, 1): 5253 case IP_VERSION(9, 4, 0): 5254 case IP_VERSION(9, 2, 2): 5255 case IP_VERSION(9, 1, 0): 5256 case IP_VERSION(9, 4, 1): 5257 case IP_VERSION(9, 3, 0): 5258 case IP_VERSION(9, 4, 2): 5259 gfx_v9_0_update_gfx_clock_gating(adev, 5260 state == AMD_CG_STATE_GATE); 5261 break; 5262 default: 5263 break; 5264 } 5265 return 0; 5266 } 5267 5268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 5269 { 5270 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5271 int data; 5272 5273 if (amdgpu_sriov_vf(adev)) 5274 *flags = 0; 5275 5276 /* AMD_CG_SUPPORT_GFX_MGCG */ 5277 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5278 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5279 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5280 5281 /* AMD_CG_SUPPORT_GFX_CGCG */ 5282 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5283 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5284 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5285 5286 /* AMD_CG_SUPPORT_GFX_CGLS */ 5287 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5288 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5289 5290 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5291 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5292 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5293 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5294 5295 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5296 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5297 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5298 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5299 5300 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) { 5301 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5302 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5303 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5304 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5305 5306 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5307 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5308 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5309 } 5310 } 5311 5312 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5313 { 5314 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 5315 } 5316 5317 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5318 { 5319 struct amdgpu_device *adev = ring->adev; 5320 u64 wptr; 5321 5322 /* XXX check if swapping is necessary on BE */ 5323 if (ring->use_doorbell) { 5324 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 5325 } else { 5326 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5327 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5328 } 5329 5330 return wptr; 5331 } 5332 5333 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5334 { 5335 struct amdgpu_device *adev = ring->adev; 5336 5337 if (ring->use_doorbell) { 5338 /* XXX check if swapping is necessary on BE */ 5339 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5340 WDOORBELL64(ring->doorbell_index, ring->wptr); 5341 } else { 5342 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5343 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5344 } 5345 } 5346 5347 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5348 { 5349 struct amdgpu_device *adev = ring->adev; 5350 u32 ref_and_mask, reg_mem_engine; 5351 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5352 5353 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5354 switch (ring->me) { 5355 case 1: 5356 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5357 break; 5358 case 2: 5359 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5360 break; 5361 default: 5362 return; 5363 } 5364 reg_mem_engine = 0; 5365 } else { 5366 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5367 reg_mem_engine = 1; /* pfp */ 5368 } 5369 5370 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5371 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5372 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5373 ref_and_mask, ref_and_mask, 0x20); 5374 } 5375 5376 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5377 struct amdgpu_job *job, 5378 struct amdgpu_ib *ib, 5379 uint32_t flags) 5380 { 5381 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5382 u32 header, control = 0; 5383 5384 if (ib->flags & AMDGPU_IB_FLAG_CE) 5385 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5386 else 5387 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5388 5389 control |= ib->length_dw | (vmid << 24); 5390 5391 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5392 control |= INDIRECT_BUFFER_PRE_ENB(1); 5393 5394 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5395 gfx_v9_0_ring_emit_de_meta(ring); 5396 } 5397 5398 amdgpu_ring_write(ring, header); 5399 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5400 amdgpu_ring_write(ring, 5401 #ifdef __BIG_ENDIAN 5402 (2 << 0) | 5403 #endif 5404 lower_32_bits(ib->gpu_addr)); 5405 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5406 amdgpu_ring_write(ring, control); 5407 } 5408 5409 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5410 struct amdgpu_job *job, 5411 struct amdgpu_ib *ib, 5412 uint32_t flags) 5413 { 5414 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5415 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5416 5417 /* Currently, there is a high possibility to get wave ID mismatch 5418 * between ME and GDS, leading to a hw deadlock, because ME generates 5419 * different wave IDs than the GDS expects. This situation happens 5420 * randomly when at least 5 compute pipes use GDS ordered append. 5421 * The wave IDs generated by ME are also wrong after suspend/resume. 5422 * Those are probably bugs somewhere else in the kernel driver. 5423 * 5424 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5425 * GDS to 0 for this ring (me/pipe). 5426 */ 5427 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5428 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5429 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5430 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5431 } 5432 5433 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5434 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5435 amdgpu_ring_write(ring, 5436 #ifdef __BIG_ENDIAN 5437 (2 << 0) | 5438 #endif 5439 lower_32_bits(ib->gpu_addr)); 5440 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5441 amdgpu_ring_write(ring, control); 5442 } 5443 5444 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5445 u64 seq, unsigned flags) 5446 { 5447 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5448 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5449 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5450 5451 /* RELEASE_MEM - flush caches, send int */ 5452 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5453 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5454 EOP_TC_NC_ACTION_EN) : 5455 (EOP_TCL1_ACTION_EN | 5456 EOP_TC_ACTION_EN | 5457 EOP_TC_WB_ACTION_EN | 5458 EOP_TC_MD_ACTION_EN)) | 5459 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5460 EVENT_INDEX(5))); 5461 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5462 5463 /* 5464 * the address should be Qword aligned if 64bit write, Dword 5465 * aligned if only send 32bit data low (discard data high) 5466 */ 5467 if (write64bit) 5468 BUG_ON(addr & 0x7); 5469 else 5470 BUG_ON(addr & 0x3); 5471 amdgpu_ring_write(ring, lower_32_bits(addr)); 5472 amdgpu_ring_write(ring, upper_32_bits(addr)); 5473 amdgpu_ring_write(ring, lower_32_bits(seq)); 5474 amdgpu_ring_write(ring, upper_32_bits(seq)); 5475 amdgpu_ring_write(ring, 0); 5476 } 5477 5478 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5479 { 5480 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5481 uint32_t seq = ring->fence_drv.sync_seq; 5482 uint64_t addr = ring->fence_drv.gpu_addr; 5483 5484 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5485 lower_32_bits(addr), upper_32_bits(addr), 5486 seq, 0xffffffff, 4); 5487 } 5488 5489 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5490 unsigned vmid, uint64_t pd_addr) 5491 { 5492 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5493 5494 /* compute doesn't have PFP */ 5495 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5496 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5497 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5498 amdgpu_ring_write(ring, 0x0); 5499 } 5500 } 5501 5502 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5503 { 5504 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5505 } 5506 5507 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5508 { 5509 u64 wptr; 5510 5511 /* XXX check if swapping is necessary on BE */ 5512 if (ring->use_doorbell) 5513 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5514 else 5515 BUG(); 5516 return wptr; 5517 } 5518 5519 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5520 { 5521 struct amdgpu_device *adev = ring->adev; 5522 5523 /* XXX check if swapping is necessary on BE */ 5524 if (ring->use_doorbell) { 5525 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5526 WDOORBELL64(ring->doorbell_index, ring->wptr); 5527 } else{ 5528 BUG(); /* only DOORBELL method supported on gfx9 now */ 5529 } 5530 } 5531 5532 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5533 u64 seq, unsigned int flags) 5534 { 5535 struct amdgpu_device *adev = ring->adev; 5536 5537 /* we only allocate 32bit for each seq wb address */ 5538 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5539 5540 /* write fence seq to the "addr" */ 5541 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5542 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5543 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5544 amdgpu_ring_write(ring, lower_32_bits(addr)); 5545 amdgpu_ring_write(ring, upper_32_bits(addr)); 5546 amdgpu_ring_write(ring, lower_32_bits(seq)); 5547 5548 if (flags & AMDGPU_FENCE_FLAG_INT) { 5549 /* set register to trigger INT */ 5550 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5551 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5552 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5553 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5554 amdgpu_ring_write(ring, 0); 5555 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5556 } 5557 } 5558 5559 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5560 { 5561 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5562 amdgpu_ring_write(ring, 0); 5563 } 5564 5565 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5566 { 5567 struct v9_ce_ib_state ce_payload = {0}; 5568 uint64_t csa_addr; 5569 int cnt; 5570 5571 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5572 csa_addr = amdgpu_csa_vaddr(ring->adev); 5573 5574 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5575 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5576 WRITE_DATA_DST_SEL(8) | 5577 WR_CONFIRM) | 5578 WRITE_DATA_CACHE_POLICY(0)); 5579 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5580 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5581 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5582 } 5583 5584 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5585 { 5586 struct v9_de_ib_state de_payload = {0}; 5587 uint64_t csa_addr, gds_addr; 5588 int cnt; 5589 5590 csa_addr = amdgpu_csa_vaddr(ring->adev); 5591 gds_addr = csa_addr + 4096; 5592 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5593 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5594 5595 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5596 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5597 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5598 WRITE_DATA_DST_SEL(8) | 5599 WR_CONFIRM) | 5600 WRITE_DATA_CACHE_POLICY(0)); 5601 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5602 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5603 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5604 } 5605 5606 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5607 bool secure) 5608 { 5609 uint32_t v = secure ? FRAME_TMZ : 0; 5610 5611 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5612 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5613 } 5614 5615 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5616 { 5617 uint32_t dw2 = 0; 5618 5619 if (amdgpu_sriov_vf(ring->adev)) 5620 gfx_v9_0_ring_emit_ce_meta(ring); 5621 5622 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5623 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5624 /* set load_global_config & load_global_uconfig */ 5625 dw2 |= 0x8001; 5626 /* set load_cs_sh_regs */ 5627 dw2 |= 0x01000000; 5628 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5629 dw2 |= 0x10002; 5630 5631 /* set load_ce_ram if preamble presented */ 5632 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5633 dw2 |= 0x10000000; 5634 } else { 5635 /* still load_ce_ram if this is the first time preamble presented 5636 * although there is no context switch happens. 5637 */ 5638 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5639 dw2 |= 0x10000000; 5640 } 5641 5642 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5643 amdgpu_ring_write(ring, dw2); 5644 amdgpu_ring_write(ring, 0); 5645 } 5646 5647 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5648 { 5649 unsigned ret; 5650 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5651 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5652 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5653 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5654 ret = ring->wptr & ring->buf_mask; 5655 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5656 return ret; 5657 } 5658 5659 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5660 { 5661 unsigned cur; 5662 BUG_ON(offset > ring->buf_mask); 5663 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5664 5665 cur = (ring->wptr & ring->buf_mask) - 1; 5666 if (likely(cur > offset)) 5667 ring->ring[offset] = cur - offset; 5668 else 5669 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5670 } 5671 5672 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5673 uint32_t reg_val_offs) 5674 { 5675 struct amdgpu_device *adev = ring->adev; 5676 5677 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5678 amdgpu_ring_write(ring, 0 | /* src: register*/ 5679 (5 << 8) | /* dst: memory */ 5680 (1 << 20)); /* write confirm */ 5681 amdgpu_ring_write(ring, reg); 5682 amdgpu_ring_write(ring, 0); 5683 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5684 reg_val_offs * 4)); 5685 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5686 reg_val_offs * 4)); 5687 } 5688 5689 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5690 uint32_t val) 5691 { 5692 uint32_t cmd = 0; 5693 5694 switch (ring->funcs->type) { 5695 case AMDGPU_RING_TYPE_GFX: 5696 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5697 break; 5698 case AMDGPU_RING_TYPE_KIQ: 5699 cmd = (1 << 16); /* no inc addr */ 5700 break; 5701 default: 5702 cmd = WR_CONFIRM; 5703 break; 5704 } 5705 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5706 amdgpu_ring_write(ring, cmd); 5707 amdgpu_ring_write(ring, reg); 5708 amdgpu_ring_write(ring, 0); 5709 amdgpu_ring_write(ring, val); 5710 } 5711 5712 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5713 uint32_t val, uint32_t mask) 5714 { 5715 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5716 } 5717 5718 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5719 uint32_t reg0, uint32_t reg1, 5720 uint32_t ref, uint32_t mask) 5721 { 5722 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5723 struct amdgpu_device *adev = ring->adev; 5724 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5725 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5726 5727 if (fw_version_ok) 5728 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5729 ref, mask, 0x20); 5730 else 5731 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5732 ref, mask); 5733 } 5734 5735 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5736 { 5737 struct amdgpu_device *adev = ring->adev; 5738 uint32_t value = 0; 5739 5740 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5741 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5742 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5743 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5744 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5745 } 5746 5747 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5748 enum amdgpu_interrupt_state state) 5749 { 5750 switch (state) { 5751 case AMDGPU_IRQ_STATE_DISABLE: 5752 case AMDGPU_IRQ_STATE_ENABLE: 5753 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5754 TIME_STAMP_INT_ENABLE, 5755 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5756 break; 5757 default: 5758 break; 5759 } 5760 } 5761 5762 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5763 int me, int pipe, 5764 enum amdgpu_interrupt_state state) 5765 { 5766 u32 mec_int_cntl, mec_int_cntl_reg; 5767 5768 /* 5769 * amdgpu controls only the first MEC. That's why this function only 5770 * handles the setting of interrupts for this specific MEC. All other 5771 * pipes' interrupts are set by amdkfd. 5772 */ 5773 5774 if (me == 1) { 5775 switch (pipe) { 5776 case 0: 5777 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5778 break; 5779 case 1: 5780 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5781 break; 5782 case 2: 5783 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5784 break; 5785 case 3: 5786 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5787 break; 5788 default: 5789 DRM_DEBUG("invalid pipe %d\n", pipe); 5790 return; 5791 } 5792 } else { 5793 DRM_DEBUG("invalid me %d\n", me); 5794 return; 5795 } 5796 5797 switch (state) { 5798 case AMDGPU_IRQ_STATE_DISABLE: 5799 mec_int_cntl = RREG32(mec_int_cntl_reg); 5800 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5801 TIME_STAMP_INT_ENABLE, 0); 5802 WREG32(mec_int_cntl_reg, mec_int_cntl); 5803 break; 5804 case AMDGPU_IRQ_STATE_ENABLE: 5805 mec_int_cntl = RREG32(mec_int_cntl_reg); 5806 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5807 TIME_STAMP_INT_ENABLE, 1); 5808 WREG32(mec_int_cntl_reg, mec_int_cntl); 5809 break; 5810 default: 5811 break; 5812 } 5813 } 5814 5815 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5816 struct amdgpu_irq_src *source, 5817 unsigned type, 5818 enum amdgpu_interrupt_state state) 5819 { 5820 switch (state) { 5821 case AMDGPU_IRQ_STATE_DISABLE: 5822 case AMDGPU_IRQ_STATE_ENABLE: 5823 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5824 PRIV_REG_INT_ENABLE, 5825 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5826 break; 5827 default: 5828 break; 5829 } 5830 5831 return 0; 5832 } 5833 5834 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5835 struct amdgpu_irq_src *source, 5836 unsigned type, 5837 enum amdgpu_interrupt_state state) 5838 { 5839 switch (state) { 5840 case AMDGPU_IRQ_STATE_DISABLE: 5841 case AMDGPU_IRQ_STATE_ENABLE: 5842 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5843 PRIV_INSTR_INT_ENABLE, 5844 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5845 break; 5846 default: 5847 break; 5848 } 5849 5850 return 0; 5851 } 5852 5853 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5854 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5855 CP_ECC_ERROR_INT_ENABLE, 1) 5856 5857 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5858 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5859 CP_ECC_ERROR_INT_ENABLE, 0) 5860 5861 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5862 struct amdgpu_irq_src *source, 5863 unsigned type, 5864 enum amdgpu_interrupt_state state) 5865 { 5866 switch (state) { 5867 case AMDGPU_IRQ_STATE_DISABLE: 5868 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5869 CP_ECC_ERROR_INT_ENABLE, 0); 5870 DISABLE_ECC_ON_ME_PIPE(1, 0); 5871 DISABLE_ECC_ON_ME_PIPE(1, 1); 5872 DISABLE_ECC_ON_ME_PIPE(1, 2); 5873 DISABLE_ECC_ON_ME_PIPE(1, 3); 5874 break; 5875 5876 case AMDGPU_IRQ_STATE_ENABLE: 5877 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5878 CP_ECC_ERROR_INT_ENABLE, 1); 5879 ENABLE_ECC_ON_ME_PIPE(1, 0); 5880 ENABLE_ECC_ON_ME_PIPE(1, 1); 5881 ENABLE_ECC_ON_ME_PIPE(1, 2); 5882 ENABLE_ECC_ON_ME_PIPE(1, 3); 5883 break; 5884 default: 5885 break; 5886 } 5887 5888 return 0; 5889 } 5890 5891 5892 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5893 struct amdgpu_irq_src *src, 5894 unsigned type, 5895 enum amdgpu_interrupt_state state) 5896 { 5897 switch (type) { 5898 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5899 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5900 break; 5901 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5902 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5903 break; 5904 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5905 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5906 break; 5907 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5908 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5909 break; 5910 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5911 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5912 break; 5913 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5914 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5915 break; 5916 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5917 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5918 break; 5919 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5920 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5921 break; 5922 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5923 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5924 break; 5925 default: 5926 break; 5927 } 5928 return 0; 5929 } 5930 5931 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5932 struct amdgpu_irq_src *source, 5933 struct amdgpu_iv_entry *entry) 5934 { 5935 int i; 5936 u8 me_id, pipe_id, queue_id; 5937 struct amdgpu_ring *ring; 5938 5939 DRM_DEBUG("IH: CP EOP\n"); 5940 me_id = (entry->ring_id & 0x0c) >> 2; 5941 pipe_id = (entry->ring_id & 0x03) >> 0; 5942 queue_id = (entry->ring_id & 0x70) >> 4; 5943 5944 switch (me_id) { 5945 case 0: 5946 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5947 break; 5948 case 1: 5949 case 2: 5950 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5951 ring = &adev->gfx.compute_ring[i]; 5952 /* Per-queue interrupt is supported for MEC starting from VI. 5953 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5954 */ 5955 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5956 amdgpu_fence_process(ring); 5957 } 5958 break; 5959 } 5960 return 0; 5961 } 5962 5963 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5964 struct amdgpu_iv_entry *entry) 5965 { 5966 u8 me_id, pipe_id, queue_id; 5967 struct amdgpu_ring *ring; 5968 int i; 5969 5970 me_id = (entry->ring_id & 0x0c) >> 2; 5971 pipe_id = (entry->ring_id & 0x03) >> 0; 5972 queue_id = (entry->ring_id & 0x70) >> 4; 5973 5974 switch (me_id) { 5975 case 0: 5976 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5977 break; 5978 case 1: 5979 case 2: 5980 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5981 ring = &adev->gfx.compute_ring[i]; 5982 if (ring->me == me_id && ring->pipe == pipe_id && 5983 ring->queue == queue_id) 5984 drm_sched_fault(&ring->sched); 5985 } 5986 break; 5987 } 5988 } 5989 5990 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5991 struct amdgpu_irq_src *source, 5992 struct amdgpu_iv_entry *entry) 5993 { 5994 DRM_ERROR("Illegal register access in command stream\n"); 5995 gfx_v9_0_fault(adev, entry); 5996 return 0; 5997 } 5998 5999 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6000 struct amdgpu_irq_src *source, 6001 struct amdgpu_iv_entry *entry) 6002 { 6003 DRM_ERROR("Illegal instruction in command stream\n"); 6004 gfx_v9_0_fault(adev, entry); 6005 return 0; 6006 } 6007 6008 6009 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6010 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6011 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6012 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6013 }, 6014 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6015 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6016 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6017 }, 6018 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6019 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6020 0, 0 6021 }, 6022 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6023 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6024 0, 0 6025 }, 6026 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6027 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6028 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6029 }, 6030 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6031 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6032 0, 0 6033 }, 6034 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6035 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6036 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6037 }, 6038 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6039 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6040 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6041 }, 6042 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6043 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6044 0, 0 6045 }, 6046 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6047 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6048 0, 0 6049 }, 6050 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6051 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6052 0, 0 6053 }, 6054 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6055 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6056 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6057 }, 6058 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6059 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6060 0, 0 6061 }, 6062 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6063 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6064 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6065 }, 6066 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6067 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6068 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6069 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6070 }, 6071 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6072 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6073 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6074 0, 0 6075 }, 6076 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6077 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6078 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6079 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6080 }, 6081 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6082 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6083 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6084 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6085 }, 6086 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6087 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6088 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6089 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6090 }, 6091 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6092 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6093 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6094 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6095 }, 6096 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6097 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6098 0, 0 6099 }, 6100 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6101 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6102 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6103 }, 6104 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6105 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6106 0, 0 6107 }, 6108 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6109 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6110 0, 0 6111 }, 6112 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6113 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6114 0, 0 6115 }, 6116 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6117 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6118 0, 0 6119 }, 6120 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6121 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6122 0, 0 6123 }, 6124 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6125 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6126 0, 0 6127 }, 6128 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6129 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6130 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6131 }, 6132 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6133 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6134 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6135 }, 6136 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6137 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6138 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6139 }, 6140 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6141 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6142 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6143 }, 6144 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6145 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6146 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6147 }, 6148 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6149 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6150 0, 0 6151 }, 6152 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6153 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6154 0, 0 6155 }, 6156 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6157 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6158 0, 0 6159 }, 6160 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6161 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6162 0, 0 6163 }, 6164 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6165 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6166 0, 0 6167 }, 6168 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6169 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6170 0, 0 6171 }, 6172 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6173 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6174 0, 0 6175 }, 6176 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6177 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6178 0, 0 6179 }, 6180 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6181 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6182 0, 0 6183 }, 6184 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6185 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6186 0, 0 6187 }, 6188 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6189 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6190 0, 0 6191 }, 6192 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6193 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6194 0, 0 6195 }, 6196 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6197 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6198 0, 0 6199 }, 6200 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6201 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6202 0, 0 6203 }, 6204 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6205 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6206 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6207 }, 6208 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6209 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6210 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6211 }, 6212 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6213 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6214 0, 0 6215 }, 6216 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6217 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6218 0, 0 6219 }, 6220 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6221 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6222 0, 0 6223 }, 6224 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6225 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6226 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6227 }, 6228 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6229 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6230 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6231 }, 6232 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6233 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6234 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6235 }, 6236 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6237 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6238 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6239 }, 6240 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6241 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6242 0, 0 6243 }, 6244 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6245 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6246 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6247 }, 6248 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6249 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6250 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6251 }, 6252 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6253 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6254 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6255 }, 6256 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6257 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6258 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6259 }, 6260 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6261 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6262 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6263 }, 6264 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6265 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6266 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6267 }, 6268 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6269 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6270 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6271 }, 6272 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6273 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6274 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6275 }, 6276 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6277 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6278 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6279 }, 6280 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6281 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6282 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6283 }, 6284 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6285 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6286 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6287 }, 6288 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6289 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6290 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6291 }, 6292 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6293 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6294 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6295 }, 6296 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6297 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6298 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6299 }, 6300 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6301 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6302 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6303 }, 6304 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6305 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6306 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6307 }, 6308 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6309 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6310 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6311 }, 6312 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6313 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6314 0, 0 6315 }, 6316 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6317 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6318 0, 0 6319 }, 6320 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6321 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6322 0, 0 6323 }, 6324 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6325 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6326 0, 0 6327 }, 6328 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6329 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6330 0, 0 6331 }, 6332 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6333 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6334 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6335 }, 6336 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6337 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6338 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6339 }, 6340 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6341 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6342 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6343 }, 6344 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6345 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6346 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6347 }, 6348 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6349 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6350 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6351 }, 6352 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6353 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6354 0, 0 6355 }, 6356 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6357 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6358 0, 0 6359 }, 6360 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6361 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6362 0, 0 6363 }, 6364 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6365 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6366 0, 0 6367 }, 6368 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6369 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6370 0, 0 6371 }, 6372 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6373 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6374 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6375 }, 6376 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6377 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6378 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6379 }, 6380 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6381 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6382 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6383 }, 6384 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6385 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6386 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6387 }, 6388 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6389 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6390 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6391 }, 6392 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6393 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6394 0, 0 6395 }, 6396 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6397 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6398 0, 0 6399 }, 6400 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6401 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6402 0, 0 6403 }, 6404 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6405 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6406 0, 0 6407 }, 6408 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6409 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6410 0, 0 6411 }, 6412 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6413 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6414 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6415 }, 6416 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6417 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6418 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6419 }, 6420 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6421 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6422 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6423 }, 6424 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6425 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6426 0, 0 6427 }, 6428 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6429 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6430 0, 0 6431 }, 6432 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6433 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6434 0, 0 6435 }, 6436 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6437 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6438 0, 0 6439 }, 6440 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6441 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6442 0, 0 6443 }, 6444 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6445 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6446 0, 0 6447 } 6448 }; 6449 6450 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6451 void *inject_if) 6452 { 6453 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6454 int ret; 6455 struct ta_ras_trigger_error_input block_info = { 0 }; 6456 6457 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6458 return -EINVAL; 6459 6460 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6461 return -EINVAL; 6462 6463 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6464 return -EPERM; 6465 6466 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6467 info->head.type)) { 6468 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6469 ras_gfx_subblocks[info->head.sub_block_index].name, 6470 info->head.type); 6471 return -EPERM; 6472 } 6473 6474 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6475 info->head.type)) { 6476 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6477 ras_gfx_subblocks[info->head.sub_block_index].name, 6478 info->head.type); 6479 return -EPERM; 6480 } 6481 6482 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6483 block_info.sub_block_index = 6484 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6485 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6486 block_info.address = info->address; 6487 block_info.value = info->value; 6488 6489 mutex_lock(&adev->grbm_idx_mutex); 6490 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6491 mutex_unlock(&adev->grbm_idx_mutex); 6492 6493 return ret; 6494 } 6495 6496 static const char *vml2_mems[] = { 6497 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6498 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6499 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6500 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6501 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6502 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6503 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6504 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6505 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6506 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6507 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6508 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6509 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6510 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6511 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6512 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6513 }; 6514 6515 static const char *vml2_walker_mems[] = { 6516 "UTC_VML2_CACHE_PDE0_MEM0", 6517 "UTC_VML2_CACHE_PDE0_MEM1", 6518 "UTC_VML2_CACHE_PDE1_MEM0", 6519 "UTC_VML2_CACHE_PDE1_MEM1", 6520 "UTC_VML2_CACHE_PDE2_MEM0", 6521 "UTC_VML2_CACHE_PDE2_MEM1", 6522 "UTC_VML2_RDIF_LOG_FIFO", 6523 }; 6524 6525 static const char *atc_l2_cache_2m_mems[] = { 6526 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6527 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6528 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6529 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6530 }; 6531 6532 static const char *atc_l2_cache_4k_mems[] = { 6533 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6534 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6535 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6536 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6537 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6538 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6539 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6540 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6541 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6542 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6543 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6544 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6545 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6546 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6547 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6548 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6549 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6550 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6551 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6552 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6553 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6554 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6555 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6556 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6557 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6558 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6559 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6560 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6561 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6562 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6563 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6564 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6565 }; 6566 6567 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6568 struct ras_err_data *err_data) 6569 { 6570 uint32_t i, data; 6571 uint32_t sec_count, ded_count; 6572 6573 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6574 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6575 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6576 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6577 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6578 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6579 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6580 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6581 6582 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6583 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6584 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6585 6586 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6587 if (sec_count) { 6588 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6589 "SEC %d\n", i, vml2_mems[i], sec_count); 6590 err_data->ce_count += sec_count; 6591 } 6592 6593 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6594 if (ded_count) { 6595 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6596 "DED %d\n", i, vml2_mems[i], ded_count); 6597 err_data->ue_count += ded_count; 6598 } 6599 } 6600 6601 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6602 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6603 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6604 6605 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6606 SEC_COUNT); 6607 if (sec_count) { 6608 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6609 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6610 err_data->ce_count += sec_count; 6611 } 6612 6613 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6614 DED_COUNT); 6615 if (ded_count) { 6616 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6617 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6618 err_data->ue_count += ded_count; 6619 } 6620 } 6621 6622 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6623 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6624 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6625 6626 sec_count = (data & 0x00006000L) >> 0xd; 6627 if (sec_count) { 6628 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6629 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6630 sec_count); 6631 err_data->ce_count += sec_count; 6632 } 6633 } 6634 6635 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6636 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6637 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6638 6639 sec_count = (data & 0x00006000L) >> 0xd; 6640 if (sec_count) { 6641 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6642 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6643 sec_count); 6644 err_data->ce_count += sec_count; 6645 } 6646 6647 ded_count = (data & 0x00018000L) >> 0xf; 6648 if (ded_count) { 6649 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6650 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6651 ded_count); 6652 err_data->ue_count += ded_count; 6653 } 6654 } 6655 6656 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6657 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6658 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6659 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6660 6661 return 0; 6662 } 6663 6664 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6665 const struct soc15_reg_entry *reg, 6666 uint32_t se_id, uint32_t inst_id, uint32_t value, 6667 uint32_t *sec_count, uint32_t *ded_count) 6668 { 6669 uint32_t i; 6670 uint32_t sec_cnt, ded_cnt; 6671 6672 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6673 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6674 gfx_v9_0_ras_fields[i].seg != reg->seg || 6675 gfx_v9_0_ras_fields[i].inst != reg->inst) 6676 continue; 6677 6678 sec_cnt = (value & 6679 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6680 gfx_v9_0_ras_fields[i].sec_count_shift; 6681 if (sec_cnt) { 6682 dev_info(adev->dev, "GFX SubBlock %s, " 6683 "Instance[%d][%d], SEC %d\n", 6684 gfx_v9_0_ras_fields[i].name, 6685 se_id, inst_id, 6686 sec_cnt); 6687 *sec_count += sec_cnt; 6688 } 6689 6690 ded_cnt = (value & 6691 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6692 gfx_v9_0_ras_fields[i].ded_count_shift; 6693 if (ded_cnt) { 6694 dev_info(adev->dev, "GFX SubBlock %s, " 6695 "Instance[%d][%d], DED %d\n", 6696 gfx_v9_0_ras_fields[i].name, 6697 se_id, inst_id, 6698 ded_cnt); 6699 *ded_count += ded_cnt; 6700 } 6701 } 6702 6703 return 0; 6704 } 6705 6706 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6707 { 6708 int i, j, k; 6709 6710 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6711 return; 6712 6713 /* read back registers to clear the counters */ 6714 mutex_lock(&adev->grbm_idx_mutex); 6715 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6716 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6717 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6718 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 6719 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6720 } 6721 } 6722 } 6723 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6724 mutex_unlock(&adev->grbm_idx_mutex); 6725 6726 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6727 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6728 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6729 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6730 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6731 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6732 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6733 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6734 6735 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6736 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6737 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6738 } 6739 6740 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6741 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6742 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6743 } 6744 6745 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6746 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6747 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6748 } 6749 6750 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6751 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6752 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6753 } 6754 6755 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6756 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6757 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6758 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6759 } 6760 6761 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6762 void *ras_error_status) 6763 { 6764 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6765 uint32_t sec_count = 0, ded_count = 0; 6766 uint32_t i, j, k; 6767 uint32_t reg_value; 6768 6769 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6770 return -EINVAL; 6771 6772 err_data->ue_count = 0; 6773 err_data->ce_count = 0; 6774 6775 mutex_lock(&adev->grbm_idx_mutex); 6776 6777 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6778 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6779 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6780 gfx_v9_0_select_se_sh(adev, j, 0, k); 6781 reg_value = 6782 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6783 if (reg_value) 6784 gfx_v9_0_ras_error_count(adev, 6785 &gfx_v9_0_edc_counter_regs[i], 6786 j, k, reg_value, 6787 &sec_count, &ded_count); 6788 } 6789 } 6790 } 6791 6792 err_data->ce_count += sec_count; 6793 err_data->ue_count += ded_count; 6794 6795 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6796 mutex_unlock(&adev->grbm_idx_mutex); 6797 6798 gfx_v9_0_query_utc_edc_status(adev, err_data); 6799 6800 return 0; 6801 } 6802 6803 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6804 { 6805 const unsigned int cp_coher_cntl = 6806 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6807 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6808 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6809 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6810 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6811 6812 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6813 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6814 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6815 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6816 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6817 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6818 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6819 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6820 } 6821 6822 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6823 uint32_t pipe, bool enable) 6824 { 6825 struct amdgpu_device *adev = ring->adev; 6826 uint32_t val; 6827 uint32_t wcl_cs_reg; 6828 6829 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6830 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6831 6832 switch (pipe) { 6833 case 0: 6834 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6835 break; 6836 case 1: 6837 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6838 break; 6839 case 2: 6840 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6841 break; 6842 case 3: 6843 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6844 break; 6845 default: 6846 DRM_DEBUG("invalid pipe %d\n", pipe); 6847 return; 6848 } 6849 6850 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6851 6852 } 6853 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6854 { 6855 struct amdgpu_device *adev = ring->adev; 6856 uint32_t val; 6857 int i; 6858 6859 6860 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6861 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6862 * around 25% of gpu resources. 6863 */ 6864 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6865 amdgpu_ring_emit_wreg(ring, 6866 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6867 val); 6868 6869 /* Restrict waves for normal/low priority compute queues as well 6870 * to get best QoS for high priority compute jobs. 6871 * 6872 * amdgpu controls only 1st ME(0-3 CS pipes). 6873 */ 6874 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6875 if (i != ring->pipe) 6876 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 6877 6878 } 6879 } 6880 6881 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6882 .name = "gfx_v9_0", 6883 .early_init = gfx_v9_0_early_init, 6884 .late_init = gfx_v9_0_late_init, 6885 .sw_init = gfx_v9_0_sw_init, 6886 .sw_fini = gfx_v9_0_sw_fini, 6887 .hw_init = gfx_v9_0_hw_init, 6888 .hw_fini = gfx_v9_0_hw_fini, 6889 .suspend = gfx_v9_0_suspend, 6890 .resume = gfx_v9_0_resume, 6891 .is_idle = gfx_v9_0_is_idle, 6892 .wait_for_idle = gfx_v9_0_wait_for_idle, 6893 .soft_reset = gfx_v9_0_soft_reset, 6894 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6895 .set_powergating_state = gfx_v9_0_set_powergating_state, 6896 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6897 }; 6898 6899 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6900 .type = AMDGPU_RING_TYPE_GFX, 6901 .align_mask = 0xff, 6902 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6903 .support_64bit_ptrs = true, 6904 .vmhub = AMDGPU_GFXHUB_0, 6905 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6906 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6907 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6908 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6909 5 + /* COND_EXEC */ 6910 7 + /* PIPELINE_SYNC */ 6911 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6912 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6913 2 + /* VM_FLUSH */ 6914 8 + /* FENCE for VM_FLUSH */ 6915 20 + /* GDS switch */ 6916 4 + /* double SWITCH_BUFFER, 6917 the first COND_EXEC jump to the place just 6918 prior to this double SWITCH_BUFFER */ 6919 5 + /* COND_EXEC */ 6920 7 + /* HDP_flush */ 6921 4 + /* VGT_flush */ 6922 14 + /* CE_META */ 6923 31 + /* DE_META */ 6924 3 + /* CNTX_CTRL */ 6925 5 + /* HDP_INVL */ 6926 8 + 8 + /* FENCE x2 */ 6927 2 + /* SWITCH_BUFFER */ 6928 7, /* gfx_v9_0_emit_mem_sync */ 6929 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6930 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6931 .emit_fence = gfx_v9_0_ring_emit_fence, 6932 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6933 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6934 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6935 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6936 .test_ring = gfx_v9_0_ring_test_ring, 6937 .test_ib = gfx_v9_0_ring_test_ib, 6938 .insert_nop = amdgpu_ring_insert_nop, 6939 .pad_ib = amdgpu_ring_generic_pad_ib, 6940 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6941 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6942 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6943 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6944 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6945 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6946 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6947 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6948 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6949 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6950 }; 6951 6952 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6953 .type = AMDGPU_RING_TYPE_COMPUTE, 6954 .align_mask = 0xff, 6955 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6956 .support_64bit_ptrs = true, 6957 .vmhub = AMDGPU_GFXHUB_0, 6958 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6959 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6960 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6961 .emit_frame_size = 6962 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6963 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6964 5 + /* hdp invalidate */ 6965 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6966 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6967 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6968 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6969 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6970 7 + /* gfx_v9_0_emit_mem_sync */ 6971 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 6972 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 6973 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6974 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6975 .emit_fence = gfx_v9_0_ring_emit_fence, 6976 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6977 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6978 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6979 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6980 .test_ring = gfx_v9_0_ring_test_ring, 6981 .test_ib = gfx_v9_0_ring_test_ib, 6982 .insert_nop = amdgpu_ring_insert_nop, 6983 .pad_ib = amdgpu_ring_generic_pad_ib, 6984 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6985 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6986 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6987 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6988 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 6989 }; 6990 6991 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6992 .type = AMDGPU_RING_TYPE_KIQ, 6993 .align_mask = 0xff, 6994 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6995 .support_64bit_ptrs = true, 6996 .vmhub = AMDGPU_GFXHUB_0, 6997 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6998 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6999 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7000 .emit_frame_size = 7001 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7002 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7003 5 + /* hdp invalidate */ 7004 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7005 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7006 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7007 2 + /* gfx_v9_0_ring_emit_vm_flush */ 7008 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7009 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7010 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7011 .test_ring = gfx_v9_0_ring_test_ring, 7012 .insert_nop = amdgpu_ring_insert_nop, 7013 .pad_ib = amdgpu_ring_generic_pad_ib, 7014 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7015 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7016 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7017 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7018 }; 7019 7020 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7021 { 7022 int i; 7023 7024 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7025 7026 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7027 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7028 7029 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7030 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7031 } 7032 7033 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7034 .set = gfx_v9_0_set_eop_interrupt_state, 7035 .process = gfx_v9_0_eop_irq, 7036 }; 7037 7038 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7039 .set = gfx_v9_0_set_priv_reg_fault_state, 7040 .process = gfx_v9_0_priv_reg_irq, 7041 }; 7042 7043 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7044 .set = gfx_v9_0_set_priv_inst_fault_state, 7045 .process = gfx_v9_0_priv_inst_irq, 7046 }; 7047 7048 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7049 .set = gfx_v9_0_set_cp_ecc_error_state, 7050 .process = amdgpu_gfx_cp_ecc_error_irq, 7051 }; 7052 7053 7054 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7055 { 7056 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7057 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7058 7059 adev->gfx.priv_reg_irq.num_types = 1; 7060 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7061 7062 adev->gfx.priv_inst_irq.num_types = 1; 7063 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7064 7065 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7066 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7067 } 7068 7069 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7070 { 7071 switch (adev->ip_versions[GC_HWIP][0]) { 7072 case IP_VERSION(9, 0, 1): 7073 case IP_VERSION(9, 2, 1): 7074 case IP_VERSION(9, 4, 0): 7075 case IP_VERSION(9, 2, 2): 7076 case IP_VERSION(9, 1, 0): 7077 case IP_VERSION(9, 4, 1): 7078 case IP_VERSION(9, 3, 0): 7079 case IP_VERSION(9, 4, 2): 7080 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7081 break; 7082 default: 7083 break; 7084 } 7085 } 7086 7087 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7088 { 7089 /* init asci gds info */ 7090 switch (adev->ip_versions[GC_HWIP][0]) { 7091 case IP_VERSION(9, 0, 1): 7092 case IP_VERSION(9, 2, 1): 7093 case IP_VERSION(9, 4, 0): 7094 adev->gds.gds_size = 0x10000; 7095 break; 7096 case IP_VERSION(9, 2, 2): 7097 case IP_VERSION(9, 1, 0): 7098 case IP_VERSION(9, 4, 1): 7099 adev->gds.gds_size = 0x1000; 7100 break; 7101 case IP_VERSION(9, 4, 2): 7102 /* aldebaran removed all the GDS internal memory, 7103 * only support GWS opcode in kernel, like barrier 7104 * semaphore.etc */ 7105 adev->gds.gds_size = 0; 7106 break; 7107 default: 7108 adev->gds.gds_size = 0x10000; 7109 break; 7110 } 7111 7112 switch (adev->ip_versions[GC_HWIP][0]) { 7113 case IP_VERSION(9, 0, 1): 7114 case IP_VERSION(9, 4, 0): 7115 adev->gds.gds_compute_max_wave_id = 0x7ff; 7116 break; 7117 case IP_VERSION(9, 2, 1): 7118 adev->gds.gds_compute_max_wave_id = 0x27f; 7119 break; 7120 case IP_VERSION(9, 2, 2): 7121 case IP_VERSION(9, 1, 0): 7122 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7123 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7124 else 7125 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7126 break; 7127 case IP_VERSION(9, 4, 1): 7128 adev->gds.gds_compute_max_wave_id = 0xfff; 7129 break; 7130 case IP_VERSION(9, 4, 2): 7131 /* deprecated for Aldebaran, no usage at all */ 7132 adev->gds.gds_compute_max_wave_id = 0; 7133 break; 7134 default: 7135 /* this really depends on the chip */ 7136 adev->gds.gds_compute_max_wave_id = 0x7ff; 7137 break; 7138 } 7139 7140 adev->gds.gws_size = 64; 7141 adev->gds.oa_size = 16; 7142 } 7143 7144 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7145 u32 bitmap) 7146 { 7147 u32 data; 7148 7149 if (!bitmap) 7150 return; 7151 7152 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7153 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7154 7155 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7156 } 7157 7158 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7159 { 7160 u32 data, mask; 7161 7162 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7163 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7164 7165 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7166 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7167 7168 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7169 7170 return (~data) & mask; 7171 } 7172 7173 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7174 struct amdgpu_cu_info *cu_info) 7175 { 7176 int i, j, k, counter, active_cu_number = 0; 7177 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7178 unsigned disable_masks[4 * 4]; 7179 7180 if (!adev || !cu_info) 7181 return -EINVAL; 7182 7183 /* 7184 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7185 */ 7186 if (adev->gfx.config.max_shader_engines * 7187 adev->gfx.config.max_sh_per_se > 16) 7188 return -EINVAL; 7189 7190 amdgpu_gfx_parse_disable_cu(disable_masks, 7191 adev->gfx.config.max_shader_engines, 7192 adev->gfx.config.max_sh_per_se); 7193 7194 mutex_lock(&adev->grbm_idx_mutex); 7195 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7196 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7197 mask = 1; 7198 ao_bitmap = 0; 7199 counter = 0; 7200 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 7201 gfx_v9_0_set_user_cu_inactive_bitmap( 7202 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7203 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7204 7205 /* 7206 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7207 * 4x4 size array, and it's usually suitable for Vega 7208 * ASICs which has 4*2 SE/SH layout. 7209 * But for Arcturus, SE/SH layout is changed to 8*1. 7210 * To mostly reduce the impact, we make it compatible 7211 * with current bitmap array as below: 7212 * SE4,SH0 --> bitmap[0][1] 7213 * SE5,SH0 --> bitmap[1][1] 7214 * SE6,SH0 --> bitmap[2][1] 7215 * SE7,SH0 --> bitmap[3][1] 7216 */ 7217 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 7218 7219 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7220 if (bitmap & mask) { 7221 if (counter < adev->gfx.config.max_cu_per_sh) 7222 ao_bitmap |= mask; 7223 counter ++; 7224 } 7225 mask <<= 1; 7226 } 7227 active_cu_number += counter; 7228 if (i < 2 && j < 2) 7229 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7230 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7231 } 7232 } 7233 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7234 mutex_unlock(&adev->grbm_idx_mutex); 7235 7236 cu_info->number = active_cu_number; 7237 cu_info->ao_cu_mask = ao_cu_mask; 7238 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7239 7240 return 0; 7241 } 7242 7243 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7244 { 7245 .type = AMD_IP_BLOCK_TYPE_GFX, 7246 .major = 9, 7247 .minor = 0, 7248 .rev = 0, 7249 .funcs = &gfx_v9_0_ip_funcs, 7250 }; 7251