1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "gfx_v9_4.h" 51 #include "gfx_v9_0.h" 52 #include "gfx_v9_4_2.h" 53 54 #include "asic_reg/pwr/pwr_10_0_offset.h" 55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 56 #include "asic_reg/gc/gc_9_0_default.h" 57 58 #define GFX9_NUM_GFX_RINGS 1 59 #define GFX9_MEC_HPD_SIZE 4096 60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 118 119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 125 126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 129 130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 142 143 enum ta_ras_gfx_subblock { 144 /*CPC*/ 145 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 146 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 147 TA_RAS_BLOCK__GFX_CPC_UCODE, 148 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 149 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 150 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 151 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 152 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 153 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 154 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 155 /* CPF*/ 156 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 157 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 158 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 159 TA_RAS_BLOCK__GFX_CPF_TAG, 160 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 161 /* CPG*/ 162 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 163 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 164 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 165 TA_RAS_BLOCK__GFX_CPG_TAG, 166 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 167 /* GDS*/ 168 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 169 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 170 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 171 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 172 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 173 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 174 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 175 /* SPI*/ 176 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 177 /* SQ*/ 178 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 180 TA_RAS_BLOCK__GFX_SQ_LDS_D, 181 TA_RAS_BLOCK__GFX_SQ_LDS_I, 182 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 183 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 184 /* SQC (3 ranges)*/ 185 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 186 /* SQC range 0*/ 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 188 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 189 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 190 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 191 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 192 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 193 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 194 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 195 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 196 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 197 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 198 /* SQC range 1*/ 199 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 200 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 202 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 203 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 204 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 205 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 207 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 209 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 212 /* SQC range 2*/ 213 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 214 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 216 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 217 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 218 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 219 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 221 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 222 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 224 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 226 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 227 /* TA*/ 228 TA_RAS_BLOCK__GFX_TA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 230 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 231 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 232 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 233 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 234 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 235 /* TCA*/ 236 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 237 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 238 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 239 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 240 /* TCC (5 sub-ranges)*/ 241 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 242 /* TCC range 0*/ 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 244 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 245 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 246 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 247 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 248 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 249 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 250 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 251 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 252 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 253 /* TCC range 1*/ 254 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 255 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 256 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 257 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 258 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 259 /* TCC range 2*/ 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 261 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 262 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 263 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 264 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 265 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 266 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 267 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 268 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 269 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 270 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 271 /* TCC range 3*/ 272 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 273 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 274 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 275 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 276 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 277 /* TCC range 4*/ 278 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 279 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 280 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 281 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 282 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 283 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 284 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 285 /* TCI*/ 286 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 287 /* TCP*/ 288 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 289 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 290 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 291 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 292 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 293 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 294 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 295 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 296 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 297 /* TD*/ 298 TA_RAS_BLOCK__GFX_TD_INDEX_START, 299 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 300 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 301 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 302 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 303 /* EA (3 sub-ranges)*/ 304 TA_RAS_BLOCK__GFX_EA_INDEX_START, 305 /* EA range 0*/ 306 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 307 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 308 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 309 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 310 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 311 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 312 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 315 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 316 /* EA range 1*/ 317 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 318 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 319 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 320 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 321 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 322 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 323 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 324 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 325 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 326 /* EA range 2*/ 327 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 328 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 329 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 330 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 331 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 332 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 333 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 334 /* UTC VM L2 bank*/ 335 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 336 /* UTC VM walker*/ 337 TA_RAS_BLOCK__UTC_VML2_WALKER, 338 /* UTC ATC L2 2MB cache*/ 339 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 340 /* UTC ATC L2 4KB cache*/ 341 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 342 TA_RAS_BLOCK__GFX_MAX 343 }; 344 345 struct ras_gfx_subblock { 346 unsigned char *name; 347 int ta_subblock; 348 int hw_supported_error_type; 349 int sw_supported_error_type; 350 }; 351 352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 353 [AMDGPU_RAS_BLOCK__##subblock] = { \ 354 #subblock, \ 355 TA_RAS_BLOCK__##subblock, \ 356 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 357 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 358 } 359 360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 365 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 366 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 371 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 372 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 374 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 375 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 378 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 380 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 382 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 384 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 386 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 388 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 389 0, 0), 390 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 391 0), 392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 393 0, 0), 394 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 395 0), 396 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 397 0, 0), 398 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 399 0), 400 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 401 1), 402 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 403 0, 0, 0), 404 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 405 0), 406 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 407 0), 408 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 409 0), 410 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 411 0), 412 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 413 0), 414 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 415 0, 0), 416 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 417 0), 418 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 419 0), 420 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 421 0, 0, 0), 422 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 423 0), 424 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 425 0), 426 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 427 0), 428 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 429 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 431 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 433 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 435 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 438 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 440 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 442 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 445 1), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 447 1), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 449 1), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 451 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 453 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 466 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 469 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 471 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 473 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 476 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 483 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 508 }; 509 510 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 511 { 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 532 }; 533 534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 535 { 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 554 }; 555 556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 557 { 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 569 }; 570 571 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 572 { 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 597 }; 598 599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 600 { 601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 608 }; 609 610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 611 { 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 631 }; 632 633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 634 { 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 654 }; 655 656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 657 { 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 674 }; 675 676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 677 { 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 691 }; 692 693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 694 { 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 706 }; 707 708 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 709 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 710 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 711 }; 712 713 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 714 { 715 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 716 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 717 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 718 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 719 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 720 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 721 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 722 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 723 }; 724 725 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 726 { 727 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 728 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 729 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 730 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 731 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 732 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 733 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 734 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 735 }; 736 737 static void gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) 738 { 739 static void *scratch_reg0; 740 static void *scratch_reg1; 741 static void *scratch_reg2; 742 static void *scratch_reg3; 743 static void *spare_int; 744 static uint32_t grbm_cntl; 745 static uint32_t grbm_idx; 746 747 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; 748 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; 749 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; 750 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; 751 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; 752 753 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; 754 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; 755 756 if (amdgpu_sriov_runtime(adev)) { 757 pr_err("shouldn't call rlcg write register during runtime\n"); 758 return; 759 } 760 761 if (offset == grbm_cntl || offset == grbm_idx) { 762 if (offset == grbm_cntl) 763 writel(v, scratch_reg2); 764 else if (offset == grbm_idx) 765 writel(v, scratch_reg3); 766 767 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); 768 } else { 769 uint32_t i = 0; 770 uint32_t retries = 50000; 771 772 writel(v, scratch_reg0); 773 writel(offset | 0x80000000, scratch_reg1); 774 writel(1, spare_int); 775 for (i = 0; i < retries; i++) { 776 u32 tmp; 777 778 tmp = readl(scratch_reg1); 779 if (!(tmp & 0x80000000)) 780 break; 781 782 udelay(10); 783 } 784 if (i >= retries) 785 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); 786 } 787 788 } 789 790 static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) 791 { 792 if (amdgpu_sriov_fullaccess(adev)) { 793 gfx_v9_0_rlcg_rw(adev, offset, v, flag); 794 795 return; 796 } 797 798 if (flag & AMDGPU_REGS_NO_KIQ) 799 WREG32_NO_KIQ(offset, v); 800 else 801 WREG32(offset, v); 802 } 803 804 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 805 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 806 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 807 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 808 809 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 810 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 811 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 812 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 813 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 814 struct amdgpu_cu_info *cu_info); 815 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 816 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 817 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 818 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 819 void *ras_error_status); 820 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 821 void *inject_if); 822 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 823 824 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 825 uint64_t queue_mask) 826 { 827 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 828 amdgpu_ring_write(kiq_ring, 829 PACKET3_SET_RESOURCES_VMID_MASK(0) | 830 /* vmid_mask:0* queue_type:0 (KIQ) */ 831 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 832 amdgpu_ring_write(kiq_ring, 833 lower_32_bits(queue_mask)); /* queue mask lo */ 834 amdgpu_ring_write(kiq_ring, 835 upper_32_bits(queue_mask)); /* queue mask hi */ 836 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 837 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 838 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 839 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 840 } 841 842 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 843 struct amdgpu_ring *ring) 844 { 845 struct amdgpu_device *adev = kiq_ring->adev; 846 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 847 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 848 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 849 850 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 851 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 852 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 853 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 854 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 855 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 856 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 857 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 858 /*queue_type: normal compute queue */ 859 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 860 /* alloc format: all_on_one_pipe */ 861 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 862 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 863 /* num_queues: must be 1 */ 864 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 865 amdgpu_ring_write(kiq_ring, 866 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 867 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 868 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 869 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 870 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 871 } 872 873 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 874 struct amdgpu_ring *ring, 875 enum amdgpu_unmap_queues_action action, 876 u64 gpu_addr, u64 seq) 877 { 878 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 879 880 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 881 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 882 PACKET3_UNMAP_QUEUES_ACTION(action) | 883 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 884 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 885 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 886 amdgpu_ring_write(kiq_ring, 887 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 888 889 if (action == PREEMPT_QUEUES_NO_UNMAP) { 890 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 891 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 892 amdgpu_ring_write(kiq_ring, seq); 893 } else { 894 amdgpu_ring_write(kiq_ring, 0); 895 amdgpu_ring_write(kiq_ring, 0); 896 amdgpu_ring_write(kiq_ring, 0); 897 } 898 } 899 900 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 901 struct amdgpu_ring *ring, 902 u64 addr, 903 u64 seq) 904 { 905 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 906 907 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 908 amdgpu_ring_write(kiq_ring, 909 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 910 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 911 PACKET3_QUERY_STATUS_COMMAND(2)); 912 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 913 amdgpu_ring_write(kiq_ring, 914 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 915 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 916 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 917 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 918 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 919 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 920 } 921 922 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 923 uint16_t pasid, uint32_t flush_type, 924 bool all_hub) 925 { 926 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 927 amdgpu_ring_write(kiq_ring, 928 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 929 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 930 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 931 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 932 } 933 934 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 935 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 936 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 937 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 938 .kiq_query_status = gfx_v9_0_kiq_query_status, 939 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 940 .set_resources_size = 8, 941 .map_queues_size = 7, 942 .unmap_queues_size = 6, 943 .query_status_size = 7, 944 .invalidate_tlbs_size = 2, 945 }; 946 947 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 948 { 949 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 950 } 951 952 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 953 { 954 switch (adev->asic_type) { 955 case CHIP_VEGA10: 956 soc15_program_register_sequence(adev, 957 golden_settings_gc_9_0, 958 ARRAY_SIZE(golden_settings_gc_9_0)); 959 soc15_program_register_sequence(adev, 960 golden_settings_gc_9_0_vg10, 961 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 962 break; 963 case CHIP_VEGA12: 964 soc15_program_register_sequence(adev, 965 golden_settings_gc_9_2_1, 966 ARRAY_SIZE(golden_settings_gc_9_2_1)); 967 soc15_program_register_sequence(adev, 968 golden_settings_gc_9_2_1_vg12, 969 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 970 break; 971 case CHIP_VEGA20: 972 soc15_program_register_sequence(adev, 973 golden_settings_gc_9_0, 974 ARRAY_SIZE(golden_settings_gc_9_0)); 975 soc15_program_register_sequence(adev, 976 golden_settings_gc_9_0_vg20, 977 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 978 break; 979 case CHIP_ARCTURUS: 980 soc15_program_register_sequence(adev, 981 golden_settings_gc_9_4_1_arct, 982 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 983 break; 984 case CHIP_RAVEN: 985 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 986 ARRAY_SIZE(golden_settings_gc_9_1)); 987 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 988 soc15_program_register_sequence(adev, 989 golden_settings_gc_9_1_rv2, 990 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 991 else 992 soc15_program_register_sequence(adev, 993 golden_settings_gc_9_1_rv1, 994 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 995 break; 996 case CHIP_RENOIR: 997 soc15_program_register_sequence(adev, 998 golden_settings_gc_9_1_rn, 999 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1000 return; /* for renoir, don't need common goldensetting */ 1001 case CHIP_ALDEBARAN: 1002 gfx_v9_4_2_init_golden_registers(adev, 1003 adev->smuio.funcs->get_die_id(adev)); 1004 break; 1005 default: 1006 break; 1007 } 1008 1009 if ((adev->asic_type != CHIP_ARCTURUS) && 1010 (adev->asic_type != CHIP_ALDEBARAN)) 1011 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1012 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1013 } 1014 1015 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 1016 { 1017 adev->gfx.scratch.num_reg = 8; 1018 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1019 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 1020 } 1021 1022 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1023 bool wc, uint32_t reg, uint32_t val) 1024 { 1025 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1026 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1027 WRITE_DATA_DST_SEL(0) | 1028 (wc ? WR_CONFIRM : 0)); 1029 amdgpu_ring_write(ring, reg); 1030 amdgpu_ring_write(ring, 0); 1031 amdgpu_ring_write(ring, val); 1032 } 1033 1034 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1035 int mem_space, int opt, uint32_t addr0, 1036 uint32_t addr1, uint32_t ref, uint32_t mask, 1037 uint32_t inv) 1038 { 1039 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1040 amdgpu_ring_write(ring, 1041 /* memory (1) or register (0) */ 1042 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1043 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1044 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1045 WAIT_REG_MEM_ENGINE(eng_sel))); 1046 1047 if (mem_space) 1048 BUG_ON(addr0 & 0x3); /* Dword align */ 1049 amdgpu_ring_write(ring, addr0); 1050 amdgpu_ring_write(ring, addr1); 1051 amdgpu_ring_write(ring, ref); 1052 amdgpu_ring_write(ring, mask); 1053 amdgpu_ring_write(ring, inv); /* poll interval */ 1054 } 1055 1056 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1057 { 1058 struct amdgpu_device *adev = ring->adev; 1059 uint32_t scratch; 1060 uint32_t tmp = 0; 1061 unsigned i; 1062 int r; 1063 1064 r = amdgpu_gfx_scratch_get(adev, &scratch); 1065 if (r) 1066 return r; 1067 1068 WREG32(scratch, 0xCAFEDEAD); 1069 r = amdgpu_ring_alloc(ring, 3); 1070 if (r) 1071 goto error_free_scratch; 1072 1073 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1074 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 1075 amdgpu_ring_write(ring, 0xDEADBEEF); 1076 amdgpu_ring_commit(ring); 1077 1078 for (i = 0; i < adev->usec_timeout; i++) { 1079 tmp = RREG32(scratch); 1080 if (tmp == 0xDEADBEEF) 1081 break; 1082 udelay(1); 1083 } 1084 1085 if (i >= adev->usec_timeout) 1086 r = -ETIMEDOUT; 1087 1088 error_free_scratch: 1089 amdgpu_gfx_scratch_free(adev, scratch); 1090 return r; 1091 } 1092 1093 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1094 { 1095 struct amdgpu_device *adev = ring->adev; 1096 struct amdgpu_ib ib; 1097 struct dma_fence *f = NULL; 1098 1099 unsigned index; 1100 uint64_t gpu_addr; 1101 uint32_t tmp; 1102 long r; 1103 1104 r = amdgpu_device_wb_get(adev, &index); 1105 if (r) 1106 return r; 1107 1108 gpu_addr = adev->wb.gpu_addr + (index * 4); 1109 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1110 memset(&ib, 0, sizeof(ib)); 1111 r = amdgpu_ib_get(adev, NULL, 16, 1112 AMDGPU_IB_POOL_DIRECT, &ib); 1113 if (r) 1114 goto err1; 1115 1116 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1117 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1118 ib.ptr[2] = lower_32_bits(gpu_addr); 1119 ib.ptr[3] = upper_32_bits(gpu_addr); 1120 ib.ptr[4] = 0xDEADBEEF; 1121 ib.length_dw = 5; 1122 1123 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1124 if (r) 1125 goto err2; 1126 1127 r = dma_fence_wait_timeout(f, false, timeout); 1128 if (r == 0) { 1129 r = -ETIMEDOUT; 1130 goto err2; 1131 } else if (r < 0) { 1132 goto err2; 1133 } 1134 1135 tmp = adev->wb.wb[index]; 1136 if (tmp == 0xDEADBEEF) 1137 r = 0; 1138 else 1139 r = -EINVAL; 1140 1141 err2: 1142 amdgpu_ib_free(adev, &ib, NULL); 1143 dma_fence_put(f); 1144 err1: 1145 amdgpu_device_wb_free(adev, index); 1146 return r; 1147 } 1148 1149 1150 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1151 { 1152 release_firmware(adev->gfx.pfp_fw); 1153 adev->gfx.pfp_fw = NULL; 1154 release_firmware(adev->gfx.me_fw); 1155 adev->gfx.me_fw = NULL; 1156 release_firmware(adev->gfx.ce_fw); 1157 adev->gfx.ce_fw = NULL; 1158 release_firmware(adev->gfx.rlc_fw); 1159 adev->gfx.rlc_fw = NULL; 1160 release_firmware(adev->gfx.mec_fw); 1161 adev->gfx.mec_fw = NULL; 1162 release_firmware(adev->gfx.mec2_fw); 1163 adev->gfx.mec2_fw = NULL; 1164 1165 kfree(adev->gfx.rlc.register_list_format); 1166 } 1167 1168 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1169 { 1170 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1171 1172 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1173 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1174 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1175 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1176 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1177 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1178 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1179 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1180 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1181 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1182 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1183 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1184 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1185 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1186 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1187 } 1188 1189 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1190 { 1191 adev->gfx.me_fw_write_wait = false; 1192 adev->gfx.mec_fw_write_wait = false; 1193 1194 if ((adev->asic_type != CHIP_ARCTURUS) && 1195 ((adev->gfx.mec_fw_version < 0x000001a5) || 1196 (adev->gfx.mec_feature_version < 46) || 1197 (adev->gfx.pfp_fw_version < 0x000000b7) || 1198 (adev->gfx.pfp_feature_version < 46))) 1199 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1200 1201 switch (adev->asic_type) { 1202 case CHIP_VEGA10: 1203 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1204 (adev->gfx.me_feature_version >= 42) && 1205 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1206 (adev->gfx.pfp_feature_version >= 42)) 1207 adev->gfx.me_fw_write_wait = true; 1208 1209 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1210 (adev->gfx.mec_feature_version >= 42)) 1211 adev->gfx.mec_fw_write_wait = true; 1212 break; 1213 case CHIP_VEGA12: 1214 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1215 (adev->gfx.me_feature_version >= 44) && 1216 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1217 (adev->gfx.pfp_feature_version >= 44)) 1218 adev->gfx.me_fw_write_wait = true; 1219 1220 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1221 (adev->gfx.mec_feature_version >= 44)) 1222 adev->gfx.mec_fw_write_wait = true; 1223 break; 1224 case CHIP_VEGA20: 1225 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1226 (adev->gfx.me_feature_version >= 44) && 1227 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1228 (adev->gfx.pfp_feature_version >= 44)) 1229 adev->gfx.me_fw_write_wait = true; 1230 1231 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1232 (adev->gfx.mec_feature_version >= 44)) 1233 adev->gfx.mec_fw_write_wait = true; 1234 break; 1235 case CHIP_RAVEN: 1236 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1237 (adev->gfx.me_feature_version >= 42) && 1238 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1239 (adev->gfx.pfp_feature_version >= 42)) 1240 adev->gfx.me_fw_write_wait = true; 1241 1242 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1243 (adev->gfx.mec_feature_version >= 42)) 1244 adev->gfx.mec_fw_write_wait = true; 1245 break; 1246 default: 1247 adev->gfx.me_fw_write_wait = true; 1248 adev->gfx.mec_fw_write_wait = true; 1249 break; 1250 } 1251 } 1252 1253 struct amdgpu_gfxoff_quirk { 1254 u16 chip_vendor; 1255 u16 chip_device; 1256 u16 subsys_vendor; 1257 u16 subsys_device; 1258 u8 revision; 1259 }; 1260 1261 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1262 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1263 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1264 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1265 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1266 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1267 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1268 { 0, 0, 0, 0, 0 }, 1269 }; 1270 1271 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1272 { 1273 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1274 1275 while (p && p->chip_device != 0) { 1276 if (pdev->vendor == p->chip_vendor && 1277 pdev->device == p->chip_device && 1278 pdev->subsystem_vendor == p->subsys_vendor && 1279 pdev->subsystem_device == p->subsys_device && 1280 pdev->revision == p->revision) { 1281 return true; 1282 } 1283 ++p; 1284 } 1285 return false; 1286 } 1287 1288 static bool is_raven_kicker(struct amdgpu_device *adev) 1289 { 1290 if (adev->pm.fw_version >= 0x41e2b) 1291 return true; 1292 else 1293 return false; 1294 } 1295 1296 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1297 { 1298 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1299 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1300 1301 switch (adev->asic_type) { 1302 case CHIP_VEGA10: 1303 case CHIP_VEGA12: 1304 case CHIP_VEGA20: 1305 break; 1306 case CHIP_RAVEN: 1307 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1308 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1309 ((!is_raven_kicker(adev) && 1310 adev->gfx.rlc_fw_version < 531) || 1311 (adev->gfx.rlc_feature_version < 1) || 1312 !adev->gfx.rlc.is_rlc_v2_1)) 1313 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1314 1315 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1316 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1317 AMD_PG_SUPPORT_CP | 1318 AMD_PG_SUPPORT_RLC_SMU_HS; 1319 break; 1320 case CHIP_RENOIR: 1321 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1322 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1323 AMD_PG_SUPPORT_CP | 1324 AMD_PG_SUPPORT_RLC_SMU_HS; 1325 break; 1326 default: 1327 break; 1328 } 1329 } 1330 1331 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1332 const char *chip_name) 1333 { 1334 char fw_name[30]; 1335 int err; 1336 struct amdgpu_firmware_info *info = NULL; 1337 const struct common_firmware_header *header = NULL; 1338 const struct gfx_firmware_header_v1_0 *cp_hdr; 1339 1340 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1341 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1342 if (err) 1343 goto out; 1344 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1345 if (err) 1346 goto out; 1347 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1348 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1349 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1350 1351 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1352 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1353 if (err) 1354 goto out; 1355 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1356 if (err) 1357 goto out; 1358 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1359 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1360 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1361 1362 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1363 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1364 if (err) 1365 goto out; 1366 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1367 if (err) 1368 goto out; 1369 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1370 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1371 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1372 1373 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1374 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1375 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1376 info->fw = adev->gfx.pfp_fw; 1377 header = (const struct common_firmware_header *)info->fw->data; 1378 adev->firmware.fw_size += 1379 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1380 1381 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1382 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1383 info->fw = adev->gfx.me_fw; 1384 header = (const struct common_firmware_header *)info->fw->data; 1385 adev->firmware.fw_size += 1386 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1387 1388 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1389 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1390 info->fw = adev->gfx.ce_fw; 1391 header = (const struct common_firmware_header *)info->fw->data; 1392 adev->firmware.fw_size += 1393 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1394 } 1395 1396 out: 1397 if (err) { 1398 dev_err(adev->dev, 1399 "gfx9: Failed to load firmware \"%s\"\n", 1400 fw_name); 1401 release_firmware(adev->gfx.pfp_fw); 1402 adev->gfx.pfp_fw = NULL; 1403 release_firmware(adev->gfx.me_fw); 1404 adev->gfx.me_fw = NULL; 1405 release_firmware(adev->gfx.ce_fw); 1406 adev->gfx.ce_fw = NULL; 1407 } 1408 return err; 1409 } 1410 1411 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1412 const char *chip_name) 1413 { 1414 char fw_name[30]; 1415 int err; 1416 struct amdgpu_firmware_info *info = NULL; 1417 const struct common_firmware_header *header = NULL; 1418 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1419 unsigned int *tmp = NULL; 1420 unsigned int i = 0; 1421 uint16_t version_major; 1422 uint16_t version_minor; 1423 uint32_t smu_version; 1424 1425 /* 1426 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1427 * instead of picasso_rlc.bin. 1428 * Judgment method: 1429 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1430 * or revision >= 0xD8 && revision <= 0xDF 1431 * otherwise is PCO FP5 1432 */ 1433 if (!strcmp(chip_name, "picasso") && 1434 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1435 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1436 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1437 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1438 (smu_version >= 0x41e2b)) 1439 /** 1440 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1441 */ 1442 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1443 else 1444 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1445 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1446 if (err) 1447 goto out; 1448 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1449 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1450 1451 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1452 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1453 if (version_major == 2 && version_minor == 1) 1454 adev->gfx.rlc.is_rlc_v2_1 = true; 1455 1456 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1457 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1458 adev->gfx.rlc.save_and_restore_offset = 1459 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1460 adev->gfx.rlc.clear_state_descriptor_offset = 1461 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1462 adev->gfx.rlc.avail_scratch_ram_locations = 1463 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1464 adev->gfx.rlc.reg_restore_list_size = 1465 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1466 adev->gfx.rlc.reg_list_format_start = 1467 le32_to_cpu(rlc_hdr->reg_list_format_start); 1468 adev->gfx.rlc.reg_list_format_separate_start = 1469 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1470 adev->gfx.rlc.starting_offsets_start = 1471 le32_to_cpu(rlc_hdr->starting_offsets_start); 1472 adev->gfx.rlc.reg_list_format_size_bytes = 1473 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1474 adev->gfx.rlc.reg_list_size_bytes = 1475 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1476 adev->gfx.rlc.register_list_format = 1477 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1478 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1479 if (!adev->gfx.rlc.register_list_format) { 1480 err = -ENOMEM; 1481 goto out; 1482 } 1483 1484 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1485 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1486 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1487 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1488 1489 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1490 1491 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1492 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1493 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1494 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1495 1496 if (adev->gfx.rlc.is_rlc_v2_1) 1497 gfx_v9_0_init_rlc_ext_microcode(adev); 1498 1499 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1500 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1501 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1502 info->fw = adev->gfx.rlc_fw; 1503 header = (const struct common_firmware_header *)info->fw->data; 1504 adev->firmware.fw_size += 1505 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1506 1507 if (adev->gfx.rlc.is_rlc_v2_1 && 1508 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1509 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1510 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1511 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1512 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1513 info->fw = adev->gfx.rlc_fw; 1514 adev->firmware.fw_size += 1515 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1516 1517 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1518 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1519 info->fw = adev->gfx.rlc_fw; 1520 adev->firmware.fw_size += 1521 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1522 1523 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1524 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1525 info->fw = adev->gfx.rlc_fw; 1526 adev->firmware.fw_size += 1527 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1528 } 1529 } 1530 1531 out: 1532 if (err) { 1533 dev_err(adev->dev, 1534 "gfx9: Failed to load firmware \"%s\"\n", 1535 fw_name); 1536 release_firmware(adev->gfx.rlc_fw); 1537 adev->gfx.rlc_fw = NULL; 1538 } 1539 return err; 1540 } 1541 1542 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1543 { 1544 if (adev->asic_type == CHIP_ALDEBARAN || 1545 adev->asic_type == CHIP_ARCTURUS || 1546 adev->asic_type == CHIP_RENOIR) 1547 return false; 1548 1549 return true; 1550 } 1551 1552 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1553 const char *chip_name) 1554 { 1555 char fw_name[30]; 1556 int err; 1557 struct amdgpu_firmware_info *info = NULL; 1558 const struct common_firmware_header *header = NULL; 1559 const struct gfx_firmware_header_v1_0 *cp_hdr; 1560 1561 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1562 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1563 if (err) 1564 goto out; 1565 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1566 if (err) 1567 goto out; 1568 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1569 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1570 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1571 1572 1573 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1574 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1575 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1576 if (!err) { 1577 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1578 if (err) 1579 goto out; 1580 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1581 adev->gfx.mec2_fw->data; 1582 adev->gfx.mec2_fw_version = 1583 le32_to_cpu(cp_hdr->header.ucode_version); 1584 adev->gfx.mec2_feature_version = 1585 le32_to_cpu(cp_hdr->ucode_feature_version); 1586 } else { 1587 err = 0; 1588 adev->gfx.mec2_fw = NULL; 1589 } 1590 } 1591 1592 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1593 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1594 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1595 info->fw = adev->gfx.mec_fw; 1596 header = (const struct common_firmware_header *)info->fw->data; 1597 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1598 adev->firmware.fw_size += 1599 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1600 1601 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1602 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1603 info->fw = adev->gfx.mec_fw; 1604 adev->firmware.fw_size += 1605 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1606 1607 if (adev->gfx.mec2_fw) { 1608 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1609 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1610 info->fw = adev->gfx.mec2_fw; 1611 header = (const struct common_firmware_header *)info->fw->data; 1612 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1613 adev->firmware.fw_size += 1614 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1615 1616 /* TODO: Determine if MEC2 JT FW loading can be removed 1617 for all GFX V9 asic and above */ 1618 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1619 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1620 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1621 info->fw = adev->gfx.mec2_fw; 1622 adev->firmware.fw_size += 1623 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1624 PAGE_SIZE); 1625 } 1626 } 1627 } 1628 1629 out: 1630 gfx_v9_0_check_if_need_gfxoff(adev); 1631 gfx_v9_0_check_fw_write_wait(adev); 1632 if (err) { 1633 dev_err(adev->dev, 1634 "gfx9: Failed to load firmware \"%s\"\n", 1635 fw_name); 1636 release_firmware(adev->gfx.mec_fw); 1637 adev->gfx.mec_fw = NULL; 1638 release_firmware(adev->gfx.mec2_fw); 1639 adev->gfx.mec2_fw = NULL; 1640 } 1641 return err; 1642 } 1643 1644 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1645 { 1646 const char *chip_name; 1647 int r; 1648 1649 DRM_DEBUG("\n"); 1650 1651 switch (adev->asic_type) { 1652 case CHIP_VEGA10: 1653 chip_name = "vega10"; 1654 break; 1655 case CHIP_VEGA12: 1656 chip_name = "vega12"; 1657 break; 1658 case CHIP_VEGA20: 1659 chip_name = "vega20"; 1660 break; 1661 case CHIP_RAVEN: 1662 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1663 chip_name = "raven2"; 1664 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1665 chip_name = "picasso"; 1666 else 1667 chip_name = "raven"; 1668 break; 1669 case CHIP_ARCTURUS: 1670 chip_name = "arcturus"; 1671 break; 1672 case CHIP_RENOIR: 1673 if (adev->apu_flags & AMD_APU_IS_RENOIR) 1674 chip_name = "renoir"; 1675 else 1676 chip_name = "green_sardine"; 1677 break; 1678 case CHIP_ALDEBARAN: 1679 chip_name = "aldebaran"; 1680 break; 1681 default: 1682 BUG(); 1683 } 1684 1685 /* No CPG in Arcturus */ 1686 if (adev->gfx.num_gfx_rings) { 1687 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1688 if (r) 1689 return r; 1690 } 1691 1692 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1693 if (r) 1694 return r; 1695 1696 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1697 if (r) 1698 return r; 1699 1700 return r; 1701 } 1702 1703 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1704 { 1705 u32 count = 0; 1706 const struct cs_section_def *sect = NULL; 1707 const struct cs_extent_def *ext = NULL; 1708 1709 /* begin clear state */ 1710 count += 2; 1711 /* context control state */ 1712 count += 3; 1713 1714 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1715 for (ext = sect->section; ext->extent != NULL; ++ext) { 1716 if (sect->id == SECT_CONTEXT) 1717 count += 2 + ext->reg_count; 1718 else 1719 return 0; 1720 } 1721 } 1722 1723 /* end clear state */ 1724 count += 2; 1725 /* clear state */ 1726 count += 2; 1727 1728 return count; 1729 } 1730 1731 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1732 volatile u32 *buffer) 1733 { 1734 u32 count = 0, i; 1735 const struct cs_section_def *sect = NULL; 1736 const struct cs_extent_def *ext = NULL; 1737 1738 if (adev->gfx.rlc.cs_data == NULL) 1739 return; 1740 if (buffer == NULL) 1741 return; 1742 1743 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1744 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1745 1746 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1747 buffer[count++] = cpu_to_le32(0x80000000); 1748 buffer[count++] = cpu_to_le32(0x80000000); 1749 1750 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1751 for (ext = sect->section; ext->extent != NULL; ++ext) { 1752 if (sect->id == SECT_CONTEXT) { 1753 buffer[count++] = 1754 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1755 buffer[count++] = cpu_to_le32(ext->reg_index - 1756 PACKET3_SET_CONTEXT_REG_START); 1757 for (i = 0; i < ext->reg_count; i++) 1758 buffer[count++] = cpu_to_le32(ext->extent[i]); 1759 } else { 1760 return; 1761 } 1762 } 1763 } 1764 1765 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1766 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1767 1768 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1769 buffer[count++] = cpu_to_le32(0); 1770 } 1771 1772 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1773 { 1774 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1775 uint32_t pg_always_on_cu_num = 2; 1776 uint32_t always_on_cu_num; 1777 uint32_t i, j, k; 1778 uint32_t mask, cu_bitmap, counter; 1779 1780 if (adev->flags & AMD_IS_APU) 1781 always_on_cu_num = 4; 1782 else if (adev->asic_type == CHIP_VEGA12) 1783 always_on_cu_num = 8; 1784 else 1785 always_on_cu_num = 12; 1786 1787 mutex_lock(&adev->grbm_idx_mutex); 1788 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1789 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1790 mask = 1; 1791 cu_bitmap = 0; 1792 counter = 0; 1793 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1794 1795 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1796 if (cu_info->bitmap[i][j] & mask) { 1797 if (counter == pg_always_on_cu_num) 1798 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1799 if (counter < always_on_cu_num) 1800 cu_bitmap |= mask; 1801 else 1802 break; 1803 counter++; 1804 } 1805 mask <<= 1; 1806 } 1807 1808 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1809 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1810 } 1811 } 1812 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1813 mutex_unlock(&adev->grbm_idx_mutex); 1814 } 1815 1816 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1817 { 1818 uint32_t data; 1819 1820 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1821 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1822 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1823 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1824 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1825 1826 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1827 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1828 1829 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1830 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1831 1832 mutex_lock(&adev->grbm_idx_mutex); 1833 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1834 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1835 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1836 1837 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1838 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1839 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1840 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1841 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1842 1843 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1844 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1845 data &= 0x0000FFFF; 1846 data |= 0x00C00000; 1847 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1848 1849 /* 1850 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1851 * programmed in gfx_v9_0_init_always_on_cu_mask() 1852 */ 1853 1854 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1855 * but used for RLC_LB_CNTL configuration */ 1856 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1857 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1858 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1859 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1860 mutex_unlock(&adev->grbm_idx_mutex); 1861 1862 gfx_v9_0_init_always_on_cu_mask(adev); 1863 } 1864 1865 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1866 { 1867 uint32_t data; 1868 1869 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1870 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1871 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1872 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1873 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1874 1875 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1876 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1877 1878 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1879 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1880 1881 mutex_lock(&adev->grbm_idx_mutex); 1882 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1883 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1884 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1885 1886 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1887 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1888 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1889 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1890 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1891 1892 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1893 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1894 data &= 0x0000FFFF; 1895 data |= 0x00C00000; 1896 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1897 1898 /* 1899 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1900 * programmed in gfx_v9_0_init_always_on_cu_mask() 1901 */ 1902 1903 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1904 * but used for RLC_LB_CNTL configuration */ 1905 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1906 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1907 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1908 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1909 mutex_unlock(&adev->grbm_idx_mutex); 1910 1911 gfx_v9_0_init_always_on_cu_mask(adev); 1912 } 1913 1914 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1915 { 1916 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1917 } 1918 1919 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1920 { 1921 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1922 return 5; 1923 else 1924 return 4; 1925 } 1926 1927 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1928 { 1929 const struct cs_section_def *cs_data; 1930 int r; 1931 1932 adev->gfx.rlc.cs_data = gfx9_cs_data; 1933 1934 cs_data = adev->gfx.rlc.cs_data; 1935 1936 if (cs_data) { 1937 /* init clear state block */ 1938 r = amdgpu_gfx_rlc_init_csb(adev); 1939 if (r) 1940 return r; 1941 } 1942 1943 if (adev->flags & AMD_IS_APU) { 1944 /* TODO: double check the cp_table_size for RV */ 1945 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1946 r = amdgpu_gfx_rlc_init_cpt(adev); 1947 if (r) 1948 return r; 1949 } 1950 1951 switch (adev->asic_type) { 1952 case CHIP_RAVEN: 1953 gfx_v9_0_init_lbpw(adev); 1954 break; 1955 case CHIP_VEGA20: 1956 gfx_v9_4_init_lbpw(adev); 1957 break; 1958 default: 1959 break; 1960 } 1961 1962 /* init spm vmid with 0xf */ 1963 if (adev->gfx.rlc.funcs->update_spm_vmid) 1964 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 1965 1966 return 0; 1967 } 1968 1969 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1970 { 1971 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1972 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1973 } 1974 1975 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1976 { 1977 int r; 1978 u32 *hpd; 1979 const __le32 *fw_data; 1980 unsigned fw_size; 1981 u32 *fw; 1982 size_t mec_hpd_size; 1983 1984 const struct gfx_firmware_header_v1_0 *mec_hdr; 1985 1986 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1987 1988 /* take ownership of the relevant compute queues */ 1989 amdgpu_gfx_compute_queue_acquire(adev); 1990 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1991 if (mec_hpd_size) { 1992 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1993 AMDGPU_GEM_DOMAIN_VRAM, 1994 &adev->gfx.mec.hpd_eop_obj, 1995 &adev->gfx.mec.hpd_eop_gpu_addr, 1996 (void **)&hpd); 1997 if (r) { 1998 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1999 gfx_v9_0_mec_fini(adev); 2000 return r; 2001 } 2002 2003 memset(hpd, 0, mec_hpd_size); 2004 2005 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 2006 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 2007 } 2008 2009 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2010 2011 fw_data = (const __le32 *) 2012 (adev->gfx.mec_fw->data + 2013 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2014 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 2015 2016 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 2017 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2018 &adev->gfx.mec.mec_fw_obj, 2019 &adev->gfx.mec.mec_fw_gpu_addr, 2020 (void **)&fw); 2021 if (r) { 2022 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 2023 gfx_v9_0_mec_fini(adev); 2024 return r; 2025 } 2026 2027 memcpy(fw, fw_data, fw_size); 2028 2029 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 2030 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 2031 2032 return 0; 2033 } 2034 2035 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 2036 { 2037 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2038 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2039 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2040 (address << SQ_IND_INDEX__INDEX__SHIFT) | 2041 (SQ_IND_INDEX__FORCE_READ_MASK)); 2042 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2043 } 2044 2045 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 2046 uint32_t wave, uint32_t thread, 2047 uint32_t regno, uint32_t num, uint32_t *out) 2048 { 2049 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2050 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2051 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2052 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 2053 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 2054 (SQ_IND_INDEX__FORCE_READ_MASK) | 2055 (SQ_IND_INDEX__AUTO_INCR_MASK)); 2056 while (num--) 2057 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2058 } 2059 2060 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 2061 { 2062 /* type 1 wave data */ 2063 dst[(*no_fields)++] = 1; 2064 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 2065 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 2066 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 2067 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 2068 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 2069 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 2070 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 2071 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 2072 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 2073 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 2074 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 2075 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 2076 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 2077 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 2078 } 2079 2080 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 2081 uint32_t wave, uint32_t start, 2082 uint32_t size, uint32_t *dst) 2083 { 2084 wave_read_regs( 2085 adev, simd, wave, 0, 2086 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 2087 } 2088 2089 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 2090 uint32_t wave, uint32_t thread, 2091 uint32_t start, uint32_t size, 2092 uint32_t *dst) 2093 { 2094 wave_read_regs( 2095 adev, simd, wave, thread, 2096 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 2097 } 2098 2099 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 2100 u32 me, u32 pipe, u32 q, u32 vm) 2101 { 2102 soc15_grbm_select(adev, me, pipe, q, vm); 2103 } 2104 2105 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2106 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2107 .select_se_sh = &gfx_v9_0_select_se_sh, 2108 .read_wave_data = &gfx_v9_0_read_wave_data, 2109 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2110 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2111 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2112 }; 2113 2114 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = { 2115 .ras_late_init = amdgpu_gfx_ras_late_init, 2116 .ras_fini = amdgpu_gfx_ras_fini, 2117 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2118 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2119 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2120 }; 2121 2122 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2123 { 2124 u32 gb_addr_config; 2125 int err; 2126 2127 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 2128 2129 switch (adev->asic_type) { 2130 case CHIP_VEGA10: 2131 adev->gfx.config.max_hw_contexts = 8; 2132 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2133 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2134 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2135 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2136 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2137 break; 2138 case CHIP_VEGA12: 2139 adev->gfx.config.max_hw_contexts = 8; 2140 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2141 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2142 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2143 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2144 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2145 DRM_INFO("fix gfx.config for vega12\n"); 2146 break; 2147 case CHIP_VEGA20: 2148 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs; 2149 adev->gfx.config.max_hw_contexts = 8; 2150 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2151 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2152 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2153 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2154 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2155 gb_addr_config &= ~0xf3e777ff; 2156 gb_addr_config |= 0x22014042; 2157 /* check vbios table if gpu info is not available */ 2158 err = amdgpu_atomfirmware_get_gfx_info(adev); 2159 if (err) 2160 return err; 2161 break; 2162 case CHIP_RAVEN: 2163 adev->gfx.config.max_hw_contexts = 8; 2164 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2165 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2166 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2167 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2168 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2169 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2170 else 2171 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2172 break; 2173 case CHIP_ARCTURUS: 2174 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs; 2175 adev->gfx.config.max_hw_contexts = 8; 2176 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2177 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2178 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2179 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2180 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2181 gb_addr_config &= ~0xf3e777ff; 2182 gb_addr_config |= 0x22014042; 2183 break; 2184 case CHIP_RENOIR: 2185 adev->gfx.config.max_hw_contexts = 8; 2186 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2187 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2188 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2189 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2190 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2191 gb_addr_config &= ~0xf3e777ff; 2192 gb_addr_config |= 0x22010042; 2193 break; 2194 case CHIP_ALDEBARAN: 2195 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs; 2196 adev->gfx.config.max_hw_contexts = 8; 2197 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2198 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2199 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2200 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2201 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2202 gb_addr_config &= ~0xf3e777ff; 2203 gb_addr_config |= 0x22014042; 2204 /* check vbios table if gpu info is not available */ 2205 err = amdgpu_atomfirmware_get_gfx_info(adev); 2206 if (err) 2207 return err; 2208 break; 2209 default: 2210 BUG(); 2211 break; 2212 } 2213 2214 adev->gfx.config.gb_addr_config = gb_addr_config; 2215 2216 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2217 REG_GET_FIELD( 2218 adev->gfx.config.gb_addr_config, 2219 GB_ADDR_CONFIG, 2220 NUM_PIPES); 2221 2222 adev->gfx.config.max_tile_pipes = 2223 adev->gfx.config.gb_addr_config_fields.num_pipes; 2224 2225 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2226 REG_GET_FIELD( 2227 adev->gfx.config.gb_addr_config, 2228 GB_ADDR_CONFIG, 2229 NUM_BANKS); 2230 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2231 REG_GET_FIELD( 2232 adev->gfx.config.gb_addr_config, 2233 GB_ADDR_CONFIG, 2234 MAX_COMPRESSED_FRAGS); 2235 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2236 REG_GET_FIELD( 2237 adev->gfx.config.gb_addr_config, 2238 GB_ADDR_CONFIG, 2239 NUM_RB_PER_SE); 2240 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2241 REG_GET_FIELD( 2242 adev->gfx.config.gb_addr_config, 2243 GB_ADDR_CONFIG, 2244 NUM_SHADER_ENGINES); 2245 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2246 REG_GET_FIELD( 2247 adev->gfx.config.gb_addr_config, 2248 GB_ADDR_CONFIG, 2249 PIPE_INTERLEAVE_SIZE)); 2250 2251 return 0; 2252 } 2253 2254 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2255 int mec, int pipe, int queue) 2256 { 2257 unsigned irq_type; 2258 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2259 unsigned int hw_prio; 2260 2261 ring = &adev->gfx.compute_ring[ring_id]; 2262 2263 /* mec0 is me1 */ 2264 ring->me = mec + 1; 2265 ring->pipe = pipe; 2266 ring->queue = queue; 2267 2268 ring->ring_obj = NULL; 2269 ring->use_doorbell = true; 2270 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2271 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2272 + (ring_id * GFX9_MEC_HPD_SIZE); 2273 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2274 2275 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2276 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2277 + ring->pipe; 2278 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2279 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 2280 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2281 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2282 hw_prio, NULL); 2283 } 2284 2285 static int gfx_v9_0_sw_init(void *handle) 2286 { 2287 int i, j, k, r, ring_id; 2288 struct amdgpu_ring *ring; 2289 struct amdgpu_kiq *kiq; 2290 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2291 2292 switch (adev->asic_type) { 2293 case CHIP_VEGA10: 2294 case CHIP_VEGA12: 2295 case CHIP_VEGA20: 2296 case CHIP_RAVEN: 2297 case CHIP_ARCTURUS: 2298 case CHIP_RENOIR: 2299 case CHIP_ALDEBARAN: 2300 adev->gfx.mec.num_mec = 2; 2301 break; 2302 default: 2303 adev->gfx.mec.num_mec = 1; 2304 break; 2305 } 2306 2307 adev->gfx.mec.num_pipe_per_mec = 4; 2308 adev->gfx.mec.num_queue_per_pipe = 8; 2309 2310 /* EOP Event */ 2311 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2312 if (r) 2313 return r; 2314 2315 /* Privileged reg */ 2316 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2317 &adev->gfx.priv_reg_irq); 2318 if (r) 2319 return r; 2320 2321 /* Privileged inst */ 2322 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2323 &adev->gfx.priv_inst_irq); 2324 if (r) 2325 return r; 2326 2327 /* ECC error */ 2328 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2329 &adev->gfx.cp_ecc_error_irq); 2330 if (r) 2331 return r; 2332 2333 /* FUE error */ 2334 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2335 &adev->gfx.cp_ecc_error_irq); 2336 if (r) 2337 return r; 2338 2339 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2340 2341 gfx_v9_0_scratch_init(adev); 2342 2343 r = gfx_v9_0_init_microcode(adev); 2344 if (r) { 2345 DRM_ERROR("Failed to load gfx firmware!\n"); 2346 return r; 2347 } 2348 2349 r = adev->gfx.rlc.funcs->init(adev); 2350 if (r) { 2351 DRM_ERROR("Failed to init rlc BOs!\n"); 2352 return r; 2353 } 2354 2355 r = gfx_v9_0_mec_init(adev); 2356 if (r) { 2357 DRM_ERROR("Failed to init MEC BOs!\n"); 2358 return r; 2359 } 2360 2361 /* set up the gfx ring */ 2362 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2363 ring = &adev->gfx.gfx_ring[i]; 2364 ring->ring_obj = NULL; 2365 if (!i) 2366 sprintf(ring->name, "gfx"); 2367 else 2368 sprintf(ring->name, "gfx_%d", i); 2369 ring->use_doorbell = true; 2370 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2371 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2372 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2373 AMDGPU_RING_PRIO_DEFAULT, NULL); 2374 if (r) 2375 return r; 2376 } 2377 2378 /* set up the compute queues - allocate horizontally across pipes */ 2379 ring_id = 0; 2380 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2381 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2382 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2383 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2384 continue; 2385 2386 r = gfx_v9_0_compute_ring_init(adev, 2387 ring_id, 2388 i, k, j); 2389 if (r) 2390 return r; 2391 2392 ring_id++; 2393 } 2394 } 2395 } 2396 2397 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2398 if (r) { 2399 DRM_ERROR("Failed to init KIQ BOs!\n"); 2400 return r; 2401 } 2402 2403 kiq = &adev->gfx.kiq; 2404 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2405 if (r) 2406 return r; 2407 2408 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2409 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2410 if (r) 2411 return r; 2412 2413 adev->gfx.ce_ram_size = 0x8000; 2414 2415 r = gfx_v9_0_gpu_early_init(adev); 2416 if (r) 2417 return r; 2418 2419 return 0; 2420 } 2421 2422 2423 static int gfx_v9_0_sw_fini(void *handle) 2424 { 2425 int i; 2426 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2427 2428 if (adev->gfx.ras_funcs && 2429 adev->gfx.ras_funcs->ras_fini) 2430 adev->gfx.ras_funcs->ras_fini(adev); 2431 2432 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2433 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2434 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2435 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2436 2437 amdgpu_gfx_mqd_sw_fini(adev); 2438 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2439 amdgpu_gfx_kiq_fini(adev); 2440 2441 gfx_v9_0_mec_fini(adev); 2442 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2443 if (adev->flags & AMD_IS_APU) { 2444 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2445 &adev->gfx.rlc.cp_table_gpu_addr, 2446 (void **)&adev->gfx.rlc.cp_table_ptr); 2447 } 2448 gfx_v9_0_free_microcode(adev); 2449 2450 return 0; 2451 } 2452 2453 2454 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2455 { 2456 /* TODO */ 2457 } 2458 2459 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2460 u32 instance) 2461 { 2462 u32 data; 2463 2464 if (instance == 0xffffffff) 2465 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2466 else 2467 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2468 2469 if (se_num == 0xffffffff) 2470 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2471 else 2472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2473 2474 if (sh_num == 0xffffffff) 2475 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2476 else 2477 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2478 2479 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2480 } 2481 2482 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2483 { 2484 u32 data, mask; 2485 2486 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2487 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2488 2489 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2490 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2491 2492 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2493 adev->gfx.config.max_sh_per_se); 2494 2495 return (~data) & mask; 2496 } 2497 2498 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2499 { 2500 int i, j; 2501 u32 data; 2502 u32 active_rbs = 0; 2503 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2504 adev->gfx.config.max_sh_per_se; 2505 2506 mutex_lock(&adev->grbm_idx_mutex); 2507 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2508 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2509 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2510 data = gfx_v9_0_get_rb_active_bitmap(adev); 2511 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2512 rb_bitmap_width_per_sh); 2513 } 2514 } 2515 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2516 mutex_unlock(&adev->grbm_idx_mutex); 2517 2518 adev->gfx.config.backend_enable_mask = active_rbs; 2519 adev->gfx.config.num_rbs = hweight32(active_rbs); 2520 } 2521 2522 #define DEFAULT_SH_MEM_BASES (0x6000) 2523 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2524 { 2525 int i; 2526 uint32_t sh_mem_config; 2527 uint32_t sh_mem_bases; 2528 2529 /* 2530 * Configure apertures: 2531 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2532 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2533 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2534 */ 2535 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2536 2537 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2538 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2539 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2540 2541 mutex_lock(&adev->srbm_mutex); 2542 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2543 soc15_grbm_select(adev, 0, 0, 0, i); 2544 /* CP and shaders */ 2545 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2546 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2547 } 2548 soc15_grbm_select(adev, 0, 0, 0, 0); 2549 mutex_unlock(&adev->srbm_mutex); 2550 2551 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2552 acccess. These should be enabled by FW for target VMIDs. */ 2553 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2554 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2555 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2556 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2557 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2558 } 2559 } 2560 2561 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2562 { 2563 int vmid; 2564 2565 /* 2566 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2567 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2568 * the driver can enable them for graphics. VMID0 should maintain 2569 * access so that HWS firmware can save/restore entries. 2570 */ 2571 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2572 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2573 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2574 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2575 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2576 } 2577 } 2578 2579 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2580 { 2581 uint32_t tmp; 2582 2583 switch (adev->asic_type) { 2584 case CHIP_ARCTURUS: 2585 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2586 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, 2587 DISABLE_BARRIER_WAITCNT, 1); 2588 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2589 break; 2590 default: 2591 break; 2592 } 2593 } 2594 2595 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2596 { 2597 u32 tmp; 2598 int i; 2599 2600 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2601 2602 gfx_v9_0_tiling_mode_table_init(adev); 2603 2604 gfx_v9_0_setup_rb(adev); 2605 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2606 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2607 2608 /* XXX SH_MEM regs */ 2609 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2610 mutex_lock(&adev->srbm_mutex); 2611 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2612 soc15_grbm_select(adev, 0, 0, 0, i); 2613 /* CP and shaders */ 2614 if (i == 0) { 2615 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2616 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2617 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2618 !!adev->gmc.noretry); 2619 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2620 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2621 } else { 2622 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2623 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2624 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2625 !!adev->gmc.noretry); 2626 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2627 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2628 (adev->gmc.private_aperture_start >> 48)); 2629 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2630 (adev->gmc.shared_aperture_start >> 48)); 2631 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2632 } 2633 } 2634 soc15_grbm_select(adev, 0, 0, 0, 0); 2635 2636 mutex_unlock(&adev->srbm_mutex); 2637 2638 gfx_v9_0_init_compute_vmid(adev); 2639 gfx_v9_0_init_gds_vmid(adev); 2640 gfx_v9_0_init_sq_config(adev); 2641 } 2642 2643 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2644 { 2645 u32 i, j, k; 2646 u32 mask; 2647 2648 mutex_lock(&adev->grbm_idx_mutex); 2649 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2650 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2651 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2652 for (k = 0; k < adev->usec_timeout; k++) { 2653 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2654 break; 2655 udelay(1); 2656 } 2657 if (k == adev->usec_timeout) { 2658 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2659 0xffffffff, 0xffffffff); 2660 mutex_unlock(&adev->grbm_idx_mutex); 2661 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2662 i, j); 2663 return; 2664 } 2665 } 2666 } 2667 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2668 mutex_unlock(&adev->grbm_idx_mutex); 2669 2670 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2671 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2672 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2673 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2674 for (k = 0; k < adev->usec_timeout; k++) { 2675 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2676 break; 2677 udelay(1); 2678 } 2679 } 2680 2681 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2682 bool enable) 2683 { 2684 u32 tmp; 2685 2686 /* These interrupts should be enabled to drive DS clock */ 2687 2688 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2689 2690 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2691 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2692 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2693 if(adev->gfx.num_gfx_rings) 2694 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2695 2696 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2697 } 2698 2699 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2700 { 2701 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2702 /* csib */ 2703 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2704 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2705 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2706 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2707 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2708 adev->gfx.rlc.clear_state_size); 2709 } 2710 2711 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2712 int indirect_offset, 2713 int list_size, 2714 int *unique_indirect_regs, 2715 int unique_indirect_reg_count, 2716 int *indirect_start_offsets, 2717 int *indirect_start_offsets_count, 2718 int max_start_offsets_count) 2719 { 2720 int idx; 2721 2722 for (; indirect_offset < list_size; indirect_offset++) { 2723 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2724 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2725 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2726 2727 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2728 indirect_offset += 2; 2729 2730 /* look for the matching indice */ 2731 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2732 if (unique_indirect_regs[idx] == 2733 register_list_format[indirect_offset] || 2734 !unique_indirect_regs[idx]) 2735 break; 2736 } 2737 2738 BUG_ON(idx >= unique_indirect_reg_count); 2739 2740 if (!unique_indirect_regs[idx]) 2741 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2742 2743 indirect_offset++; 2744 } 2745 } 2746 } 2747 2748 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2749 { 2750 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2751 int unique_indirect_reg_count = 0; 2752 2753 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2754 int indirect_start_offsets_count = 0; 2755 2756 int list_size = 0; 2757 int i = 0, j = 0; 2758 u32 tmp = 0; 2759 2760 u32 *register_list_format = 2761 kmemdup(adev->gfx.rlc.register_list_format, 2762 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2763 if (!register_list_format) 2764 return -ENOMEM; 2765 2766 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2767 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2768 gfx_v9_1_parse_ind_reg_list(register_list_format, 2769 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2770 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2771 unique_indirect_regs, 2772 unique_indirect_reg_count, 2773 indirect_start_offsets, 2774 &indirect_start_offsets_count, 2775 ARRAY_SIZE(indirect_start_offsets)); 2776 2777 /* enable auto inc in case it is disabled */ 2778 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2779 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2780 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2781 2782 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2783 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2784 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2785 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2786 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2787 adev->gfx.rlc.register_restore[i]); 2788 2789 /* load indirect register */ 2790 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2791 adev->gfx.rlc.reg_list_format_start); 2792 2793 /* direct register portion */ 2794 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2795 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2796 register_list_format[i]); 2797 2798 /* indirect register portion */ 2799 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2800 if (register_list_format[i] == 0xFFFFFFFF) { 2801 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2802 continue; 2803 } 2804 2805 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2806 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2807 2808 for (j = 0; j < unique_indirect_reg_count; j++) { 2809 if (register_list_format[i] == unique_indirect_regs[j]) { 2810 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2811 break; 2812 } 2813 } 2814 2815 BUG_ON(j >= unique_indirect_reg_count); 2816 2817 i++; 2818 } 2819 2820 /* set save/restore list size */ 2821 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2822 list_size = list_size >> 1; 2823 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2824 adev->gfx.rlc.reg_restore_list_size); 2825 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2826 2827 /* write the starting offsets to RLC scratch ram */ 2828 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2829 adev->gfx.rlc.starting_offsets_start); 2830 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2831 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2832 indirect_start_offsets[i]); 2833 2834 /* load unique indirect regs*/ 2835 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2836 if (unique_indirect_regs[i] != 0) { 2837 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2838 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2839 unique_indirect_regs[i] & 0x3FFFF); 2840 2841 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2842 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2843 unique_indirect_regs[i] >> 20); 2844 } 2845 } 2846 2847 kfree(register_list_format); 2848 return 0; 2849 } 2850 2851 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2852 { 2853 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2854 } 2855 2856 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2857 bool enable) 2858 { 2859 uint32_t data = 0; 2860 uint32_t default_data = 0; 2861 2862 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2863 if (enable) { 2864 /* enable GFXIP control over CGPG */ 2865 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2866 if(default_data != data) 2867 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2868 2869 /* update status */ 2870 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2871 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2872 if(default_data != data) 2873 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2874 } else { 2875 /* restore GFXIP control over GCPG */ 2876 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2877 if(default_data != data) 2878 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2879 } 2880 } 2881 2882 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2883 { 2884 uint32_t data = 0; 2885 2886 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2887 AMD_PG_SUPPORT_GFX_SMG | 2888 AMD_PG_SUPPORT_GFX_DMG)) { 2889 /* init IDLE_POLL_COUNT = 60 */ 2890 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2891 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2892 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2893 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2894 2895 /* init RLC PG Delay */ 2896 data = 0; 2897 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2898 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2899 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2900 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2901 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2902 2903 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2904 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2905 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2906 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2907 2908 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2909 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2910 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2911 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2912 2913 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2914 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2915 2916 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2917 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2919 if (adev->asic_type != CHIP_RENOIR) 2920 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2921 } 2922 } 2923 2924 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2925 bool enable) 2926 { 2927 uint32_t data = 0; 2928 uint32_t default_data = 0; 2929 2930 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2931 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2932 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2933 enable ? 1 : 0); 2934 if (default_data != data) 2935 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2936 } 2937 2938 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2939 bool enable) 2940 { 2941 uint32_t data = 0; 2942 uint32_t default_data = 0; 2943 2944 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2945 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2946 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2947 enable ? 1 : 0); 2948 if(default_data != data) 2949 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2950 } 2951 2952 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2953 bool enable) 2954 { 2955 uint32_t data = 0; 2956 uint32_t default_data = 0; 2957 2958 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2959 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2960 CP_PG_DISABLE, 2961 enable ? 0 : 1); 2962 if(default_data != data) 2963 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2964 } 2965 2966 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2967 bool enable) 2968 { 2969 uint32_t data, default_data; 2970 2971 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2972 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2973 GFX_POWER_GATING_ENABLE, 2974 enable ? 1 : 0); 2975 if(default_data != data) 2976 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2977 } 2978 2979 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2980 bool enable) 2981 { 2982 uint32_t data, default_data; 2983 2984 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2985 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2986 GFX_PIPELINE_PG_ENABLE, 2987 enable ? 1 : 0); 2988 if(default_data != data) 2989 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2990 2991 if (!enable) 2992 /* read any GFX register to wake up GFX */ 2993 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2994 } 2995 2996 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2997 bool enable) 2998 { 2999 uint32_t data, default_data; 3000 3001 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3002 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3003 STATIC_PER_CU_PG_ENABLE, 3004 enable ? 1 : 0); 3005 if(default_data != data) 3006 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3007 } 3008 3009 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3010 bool enable) 3011 { 3012 uint32_t data, default_data; 3013 3014 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3015 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3016 DYN_PER_CU_PG_ENABLE, 3017 enable ? 1 : 0); 3018 if(default_data != data) 3019 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3020 } 3021 3022 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3023 { 3024 gfx_v9_0_init_csb(adev); 3025 3026 /* 3027 * Rlc save restore list is workable since v2_1. 3028 * And it's needed by gfxoff feature. 3029 */ 3030 if (adev->gfx.rlc.is_rlc_v2_1) { 3031 if (adev->asic_type == CHIP_VEGA12 || 3032 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3033 gfx_v9_1_init_rlc_save_restore_list(adev); 3034 gfx_v9_0_enable_save_restore_machine(adev); 3035 } 3036 3037 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3038 AMD_PG_SUPPORT_GFX_SMG | 3039 AMD_PG_SUPPORT_GFX_DMG | 3040 AMD_PG_SUPPORT_CP | 3041 AMD_PG_SUPPORT_GDS | 3042 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3043 WREG32(mmRLC_JUMP_TABLE_RESTORE, 3044 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3045 gfx_v9_0_init_gfx_power_gating(adev); 3046 } 3047 } 3048 3049 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3050 { 3051 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3052 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3053 gfx_v9_0_wait_for_rlc_serdes(adev); 3054 } 3055 3056 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3057 { 3058 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3059 udelay(50); 3060 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3061 udelay(50); 3062 } 3063 3064 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3065 { 3066 #ifdef AMDGPU_RLC_DEBUG_RETRY 3067 u32 rlc_ucode_ver; 3068 #endif 3069 3070 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3071 udelay(50); 3072 3073 /* carrizo do enable cp interrupt after cp inited */ 3074 if (!(adev->flags & AMD_IS_APU)) { 3075 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3076 udelay(50); 3077 } 3078 3079 #ifdef AMDGPU_RLC_DEBUG_RETRY 3080 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3081 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3082 if(rlc_ucode_ver == 0x108) { 3083 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3084 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3085 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3086 * default is 0x9C4 to create a 100us interval */ 3087 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3088 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3089 * to disable the page fault retry interrupts, default is 3090 * 0x100 (256) */ 3091 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3092 } 3093 #endif 3094 } 3095 3096 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3097 { 3098 const struct rlc_firmware_header_v2_0 *hdr; 3099 const __le32 *fw_data; 3100 unsigned i, fw_size; 3101 3102 if (!adev->gfx.rlc_fw) 3103 return -EINVAL; 3104 3105 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3106 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3107 3108 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3109 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3110 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3111 3112 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3113 RLCG_UCODE_LOADING_START_ADDRESS); 3114 for (i = 0; i < fw_size; i++) 3115 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3116 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3117 3118 return 0; 3119 } 3120 3121 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3122 { 3123 int r; 3124 3125 if (amdgpu_sriov_vf(adev)) { 3126 gfx_v9_0_init_csb(adev); 3127 return 0; 3128 } 3129 3130 adev->gfx.rlc.funcs->stop(adev); 3131 3132 /* disable CG */ 3133 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3134 3135 gfx_v9_0_init_pg(adev); 3136 3137 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3138 /* legacy rlc firmware loading */ 3139 r = gfx_v9_0_rlc_load_microcode(adev); 3140 if (r) 3141 return r; 3142 } 3143 3144 switch (adev->asic_type) { 3145 case CHIP_RAVEN: 3146 if (amdgpu_lbpw == 0) 3147 gfx_v9_0_enable_lbpw(adev, false); 3148 else 3149 gfx_v9_0_enable_lbpw(adev, true); 3150 break; 3151 case CHIP_VEGA20: 3152 if (amdgpu_lbpw > 0) 3153 gfx_v9_0_enable_lbpw(adev, true); 3154 else 3155 gfx_v9_0_enable_lbpw(adev, false); 3156 break; 3157 default: 3158 break; 3159 } 3160 3161 adev->gfx.rlc.funcs->start(adev); 3162 3163 return 0; 3164 } 3165 3166 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3167 { 3168 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3169 3170 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3171 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3172 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3173 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3174 udelay(50); 3175 } 3176 3177 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3178 { 3179 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3180 const struct gfx_firmware_header_v1_0 *ce_hdr; 3181 const struct gfx_firmware_header_v1_0 *me_hdr; 3182 const __le32 *fw_data; 3183 unsigned i, fw_size; 3184 3185 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3186 return -EINVAL; 3187 3188 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3189 adev->gfx.pfp_fw->data; 3190 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3191 adev->gfx.ce_fw->data; 3192 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3193 adev->gfx.me_fw->data; 3194 3195 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3196 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3197 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3198 3199 gfx_v9_0_cp_gfx_enable(adev, false); 3200 3201 /* PFP */ 3202 fw_data = (const __le32 *) 3203 (adev->gfx.pfp_fw->data + 3204 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3205 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3206 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3207 for (i = 0; i < fw_size; i++) 3208 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3209 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3210 3211 /* CE */ 3212 fw_data = (const __le32 *) 3213 (adev->gfx.ce_fw->data + 3214 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3215 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3216 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3217 for (i = 0; i < fw_size; i++) 3218 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3219 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3220 3221 /* ME */ 3222 fw_data = (const __le32 *) 3223 (adev->gfx.me_fw->data + 3224 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3225 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3226 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3227 for (i = 0; i < fw_size; i++) 3228 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3229 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3230 3231 return 0; 3232 } 3233 3234 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3235 { 3236 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3237 const struct cs_section_def *sect = NULL; 3238 const struct cs_extent_def *ext = NULL; 3239 int r, i, tmp; 3240 3241 /* init the CP */ 3242 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3243 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3244 3245 gfx_v9_0_cp_gfx_enable(adev, true); 3246 3247 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3248 if (r) { 3249 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3250 return r; 3251 } 3252 3253 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3254 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3255 3256 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3257 amdgpu_ring_write(ring, 0x80000000); 3258 amdgpu_ring_write(ring, 0x80000000); 3259 3260 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3261 for (ext = sect->section; ext->extent != NULL; ++ext) { 3262 if (sect->id == SECT_CONTEXT) { 3263 amdgpu_ring_write(ring, 3264 PACKET3(PACKET3_SET_CONTEXT_REG, 3265 ext->reg_count)); 3266 amdgpu_ring_write(ring, 3267 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3268 for (i = 0; i < ext->reg_count; i++) 3269 amdgpu_ring_write(ring, ext->extent[i]); 3270 } 3271 } 3272 } 3273 3274 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3275 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3276 3277 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3278 amdgpu_ring_write(ring, 0); 3279 3280 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3281 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3282 amdgpu_ring_write(ring, 0x8000); 3283 amdgpu_ring_write(ring, 0x8000); 3284 3285 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3286 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3287 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3288 amdgpu_ring_write(ring, tmp); 3289 amdgpu_ring_write(ring, 0); 3290 3291 amdgpu_ring_commit(ring); 3292 3293 return 0; 3294 } 3295 3296 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3297 { 3298 struct amdgpu_ring *ring; 3299 u32 tmp; 3300 u32 rb_bufsz; 3301 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3302 3303 /* Set the write pointer delay */ 3304 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3305 3306 /* set the RB to use vmid 0 */ 3307 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3308 3309 /* Set ring buffer size */ 3310 ring = &adev->gfx.gfx_ring[0]; 3311 rb_bufsz = order_base_2(ring->ring_size / 8); 3312 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3313 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3314 #ifdef __BIG_ENDIAN 3315 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3316 #endif 3317 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3318 3319 /* Initialize the ring buffer's write pointers */ 3320 ring->wptr = 0; 3321 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3322 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3323 3324 /* set the wb address wether it's enabled or not */ 3325 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3326 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3327 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3328 3329 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3330 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3331 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3332 3333 mdelay(1); 3334 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3335 3336 rb_addr = ring->gpu_addr >> 8; 3337 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3338 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3339 3340 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3341 if (ring->use_doorbell) { 3342 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3343 DOORBELL_OFFSET, ring->doorbell_index); 3344 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3345 DOORBELL_EN, 1); 3346 } else { 3347 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3348 } 3349 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3350 3351 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3352 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3353 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3354 3355 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3356 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3357 3358 3359 /* start the ring */ 3360 gfx_v9_0_cp_gfx_start(adev); 3361 ring->sched.ready = true; 3362 3363 return 0; 3364 } 3365 3366 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3367 { 3368 if (enable) { 3369 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3370 } else { 3371 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3372 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3373 adev->gfx.kiq.ring.sched.ready = false; 3374 } 3375 udelay(50); 3376 } 3377 3378 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3379 { 3380 const struct gfx_firmware_header_v1_0 *mec_hdr; 3381 const __le32 *fw_data; 3382 unsigned i; 3383 u32 tmp; 3384 3385 if (!adev->gfx.mec_fw) 3386 return -EINVAL; 3387 3388 gfx_v9_0_cp_compute_enable(adev, false); 3389 3390 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3391 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3392 3393 fw_data = (const __le32 *) 3394 (adev->gfx.mec_fw->data + 3395 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3396 tmp = 0; 3397 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3398 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3399 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3400 3401 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3402 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3403 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3404 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3405 3406 /* MEC1 */ 3407 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3408 mec_hdr->jt_offset); 3409 for (i = 0; i < mec_hdr->jt_size; i++) 3410 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3411 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3412 3413 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3414 adev->gfx.mec_fw_version); 3415 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3416 3417 return 0; 3418 } 3419 3420 /* KIQ functions */ 3421 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3422 { 3423 uint32_t tmp; 3424 struct amdgpu_device *adev = ring->adev; 3425 3426 /* tell RLC which is KIQ queue */ 3427 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3428 tmp &= 0xffffff00; 3429 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3430 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3431 tmp |= 0x80; 3432 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3433 } 3434 3435 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3436 { 3437 struct amdgpu_device *adev = ring->adev; 3438 3439 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3440 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3441 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3442 mqd->cp_hqd_queue_priority = 3443 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3444 } 3445 } 3446 } 3447 3448 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3449 { 3450 struct amdgpu_device *adev = ring->adev; 3451 struct v9_mqd *mqd = ring->mqd_ptr; 3452 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3453 uint32_t tmp; 3454 3455 mqd->header = 0xC0310800; 3456 mqd->compute_pipelinestat_enable = 0x00000001; 3457 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3458 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3459 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3460 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3461 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3462 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3463 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3464 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3465 mqd->compute_misc_reserved = 0x00000003; 3466 3467 mqd->dynamic_cu_mask_addr_lo = 3468 lower_32_bits(ring->mqd_gpu_addr 3469 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3470 mqd->dynamic_cu_mask_addr_hi = 3471 upper_32_bits(ring->mqd_gpu_addr 3472 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3473 3474 eop_base_addr = ring->eop_gpu_addr >> 8; 3475 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3476 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3477 3478 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3479 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3480 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3481 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3482 3483 mqd->cp_hqd_eop_control = tmp; 3484 3485 /* enable doorbell? */ 3486 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3487 3488 if (ring->use_doorbell) { 3489 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3490 DOORBELL_OFFSET, ring->doorbell_index); 3491 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3492 DOORBELL_EN, 1); 3493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3494 DOORBELL_SOURCE, 0); 3495 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3496 DOORBELL_HIT, 0); 3497 } else { 3498 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3499 DOORBELL_EN, 0); 3500 } 3501 3502 mqd->cp_hqd_pq_doorbell_control = tmp; 3503 3504 /* disable the queue if it's active */ 3505 ring->wptr = 0; 3506 mqd->cp_hqd_dequeue_request = 0; 3507 mqd->cp_hqd_pq_rptr = 0; 3508 mqd->cp_hqd_pq_wptr_lo = 0; 3509 mqd->cp_hqd_pq_wptr_hi = 0; 3510 3511 /* set the pointer to the MQD */ 3512 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3513 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3514 3515 /* set MQD vmid to 0 */ 3516 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3517 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3518 mqd->cp_mqd_control = tmp; 3519 3520 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3521 hqd_gpu_addr = ring->gpu_addr >> 8; 3522 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3523 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3524 3525 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3526 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3527 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3528 (order_base_2(ring->ring_size / 4) - 1)); 3529 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3530 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3531 #ifdef __BIG_ENDIAN 3532 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3533 #endif 3534 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3535 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3536 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3537 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3538 mqd->cp_hqd_pq_control = tmp; 3539 3540 /* set the wb address whether it's enabled or not */ 3541 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3542 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3543 mqd->cp_hqd_pq_rptr_report_addr_hi = 3544 upper_32_bits(wb_gpu_addr) & 0xffff; 3545 3546 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3547 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3548 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3549 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3550 3551 tmp = 0; 3552 /* enable the doorbell if requested */ 3553 if (ring->use_doorbell) { 3554 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3555 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3556 DOORBELL_OFFSET, ring->doorbell_index); 3557 3558 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3559 DOORBELL_EN, 1); 3560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3561 DOORBELL_SOURCE, 0); 3562 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3563 DOORBELL_HIT, 0); 3564 } 3565 3566 mqd->cp_hqd_pq_doorbell_control = tmp; 3567 3568 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3569 ring->wptr = 0; 3570 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3571 3572 /* set the vmid for the queue */ 3573 mqd->cp_hqd_vmid = 0; 3574 3575 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3576 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3577 mqd->cp_hqd_persistent_state = tmp; 3578 3579 /* set MIN_IB_AVAIL_SIZE */ 3580 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3581 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3582 mqd->cp_hqd_ib_control = tmp; 3583 3584 /* set static priority for a queue/ring */ 3585 gfx_v9_0_mqd_set_priority(ring, mqd); 3586 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 3587 3588 /* map_queues packet doesn't need activate the queue, 3589 * so only kiq need set this field. 3590 */ 3591 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3592 mqd->cp_hqd_active = 1; 3593 3594 return 0; 3595 } 3596 3597 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3598 { 3599 struct amdgpu_device *adev = ring->adev; 3600 struct v9_mqd *mqd = ring->mqd_ptr; 3601 int j; 3602 3603 /* disable wptr polling */ 3604 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3605 3606 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3607 mqd->cp_hqd_eop_base_addr_lo); 3608 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3609 mqd->cp_hqd_eop_base_addr_hi); 3610 3611 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3612 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3613 mqd->cp_hqd_eop_control); 3614 3615 /* enable doorbell? */ 3616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3617 mqd->cp_hqd_pq_doorbell_control); 3618 3619 /* disable the queue if it's active */ 3620 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3621 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3622 for (j = 0; j < adev->usec_timeout; j++) { 3623 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3624 break; 3625 udelay(1); 3626 } 3627 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3628 mqd->cp_hqd_dequeue_request); 3629 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3630 mqd->cp_hqd_pq_rptr); 3631 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3632 mqd->cp_hqd_pq_wptr_lo); 3633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3634 mqd->cp_hqd_pq_wptr_hi); 3635 } 3636 3637 /* set the pointer to the MQD */ 3638 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3639 mqd->cp_mqd_base_addr_lo); 3640 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3641 mqd->cp_mqd_base_addr_hi); 3642 3643 /* set MQD vmid to 0 */ 3644 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3645 mqd->cp_mqd_control); 3646 3647 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3648 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3649 mqd->cp_hqd_pq_base_lo); 3650 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3651 mqd->cp_hqd_pq_base_hi); 3652 3653 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3654 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3655 mqd->cp_hqd_pq_control); 3656 3657 /* set the wb address whether it's enabled or not */ 3658 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3659 mqd->cp_hqd_pq_rptr_report_addr_lo); 3660 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3661 mqd->cp_hqd_pq_rptr_report_addr_hi); 3662 3663 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3664 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3665 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3666 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3667 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3668 3669 /* enable the doorbell if requested */ 3670 if (ring->use_doorbell) { 3671 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3672 (adev->doorbell_index.kiq * 2) << 2); 3673 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3674 (adev->doorbell_index.userqueue_end * 2) << 2); 3675 } 3676 3677 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3678 mqd->cp_hqd_pq_doorbell_control); 3679 3680 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3682 mqd->cp_hqd_pq_wptr_lo); 3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3684 mqd->cp_hqd_pq_wptr_hi); 3685 3686 /* set the vmid for the queue */ 3687 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3688 3689 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3690 mqd->cp_hqd_persistent_state); 3691 3692 /* activate the queue */ 3693 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3694 mqd->cp_hqd_active); 3695 3696 if (ring->use_doorbell) 3697 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3698 3699 return 0; 3700 } 3701 3702 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3703 { 3704 struct amdgpu_device *adev = ring->adev; 3705 int j; 3706 3707 /* disable the queue if it's active */ 3708 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3709 3710 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3711 3712 for (j = 0; j < adev->usec_timeout; j++) { 3713 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3714 break; 3715 udelay(1); 3716 } 3717 3718 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3719 DRM_DEBUG("KIQ dequeue request failed.\n"); 3720 3721 /* Manual disable if dequeue request times out */ 3722 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3723 } 3724 3725 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3726 0); 3727 } 3728 3729 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3730 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3731 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3732 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3733 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3734 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3735 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3736 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3737 3738 return 0; 3739 } 3740 3741 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3742 { 3743 struct amdgpu_device *adev = ring->adev; 3744 struct v9_mqd *mqd = ring->mqd_ptr; 3745 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3746 struct v9_mqd *tmp_mqd; 3747 3748 gfx_v9_0_kiq_setting(ring); 3749 3750 /* GPU could be in bad state during probe, driver trigger the reset 3751 * after load the SMU, in this case , the mqd is not be initialized. 3752 * driver need to re-init the mqd. 3753 * check mqd->cp_hqd_pq_control since this value should not be 0 3754 */ 3755 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3756 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3757 /* for GPU_RESET case , reset MQD to a clean status */ 3758 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3759 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3760 3761 /* reset ring buffer */ 3762 ring->wptr = 0; 3763 amdgpu_ring_clear_ring(ring); 3764 3765 mutex_lock(&adev->srbm_mutex); 3766 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3767 gfx_v9_0_kiq_init_register(ring); 3768 soc15_grbm_select(adev, 0, 0, 0, 0); 3769 mutex_unlock(&adev->srbm_mutex); 3770 } else { 3771 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3772 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3773 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3774 mutex_lock(&adev->srbm_mutex); 3775 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3776 gfx_v9_0_mqd_init(ring); 3777 gfx_v9_0_kiq_init_register(ring); 3778 soc15_grbm_select(adev, 0, 0, 0, 0); 3779 mutex_unlock(&adev->srbm_mutex); 3780 3781 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3782 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3783 } 3784 3785 return 0; 3786 } 3787 3788 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3789 { 3790 struct amdgpu_device *adev = ring->adev; 3791 struct v9_mqd *mqd = ring->mqd_ptr; 3792 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3793 struct v9_mqd *tmp_mqd; 3794 3795 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3796 * is not be initialized before 3797 */ 3798 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3799 3800 if (!tmp_mqd->cp_hqd_pq_control || 3801 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3802 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3803 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3804 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3805 mutex_lock(&adev->srbm_mutex); 3806 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3807 gfx_v9_0_mqd_init(ring); 3808 soc15_grbm_select(adev, 0, 0, 0, 0); 3809 mutex_unlock(&adev->srbm_mutex); 3810 3811 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3812 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3813 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 3814 /* reset MQD to a clean status */ 3815 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3816 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3817 3818 /* reset ring buffer */ 3819 ring->wptr = 0; 3820 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 3821 amdgpu_ring_clear_ring(ring); 3822 } else { 3823 amdgpu_ring_clear_ring(ring); 3824 } 3825 3826 return 0; 3827 } 3828 3829 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3830 { 3831 struct amdgpu_ring *ring; 3832 int r; 3833 3834 ring = &adev->gfx.kiq.ring; 3835 3836 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3837 if (unlikely(r != 0)) 3838 return r; 3839 3840 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3841 if (unlikely(r != 0)) 3842 return r; 3843 3844 gfx_v9_0_kiq_init_queue(ring); 3845 amdgpu_bo_kunmap(ring->mqd_obj); 3846 ring->mqd_ptr = NULL; 3847 amdgpu_bo_unreserve(ring->mqd_obj); 3848 ring->sched.ready = true; 3849 return 0; 3850 } 3851 3852 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3853 { 3854 struct amdgpu_ring *ring = NULL; 3855 int r = 0, i; 3856 3857 gfx_v9_0_cp_compute_enable(adev, true); 3858 3859 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3860 ring = &adev->gfx.compute_ring[i]; 3861 3862 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3863 if (unlikely(r != 0)) 3864 goto done; 3865 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3866 if (!r) { 3867 r = gfx_v9_0_kcq_init_queue(ring); 3868 amdgpu_bo_kunmap(ring->mqd_obj); 3869 ring->mqd_ptr = NULL; 3870 } 3871 amdgpu_bo_unreserve(ring->mqd_obj); 3872 if (r) 3873 goto done; 3874 } 3875 3876 r = amdgpu_gfx_enable_kcq(adev); 3877 done: 3878 return r; 3879 } 3880 3881 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3882 { 3883 int r, i; 3884 struct amdgpu_ring *ring; 3885 3886 if (!(adev->flags & AMD_IS_APU)) 3887 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3888 3889 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3890 if (adev->gfx.num_gfx_rings) { 3891 /* legacy firmware loading */ 3892 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3893 if (r) 3894 return r; 3895 } 3896 3897 r = gfx_v9_0_cp_compute_load_microcode(adev); 3898 if (r) 3899 return r; 3900 } 3901 3902 r = gfx_v9_0_kiq_resume(adev); 3903 if (r) 3904 return r; 3905 3906 if (adev->gfx.num_gfx_rings) { 3907 r = gfx_v9_0_cp_gfx_resume(adev); 3908 if (r) 3909 return r; 3910 } 3911 3912 r = gfx_v9_0_kcq_resume(adev); 3913 if (r) 3914 return r; 3915 3916 if (adev->gfx.num_gfx_rings) { 3917 ring = &adev->gfx.gfx_ring[0]; 3918 r = amdgpu_ring_test_helper(ring); 3919 if (r) 3920 return r; 3921 } 3922 3923 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3924 ring = &adev->gfx.compute_ring[i]; 3925 amdgpu_ring_test_helper(ring); 3926 } 3927 3928 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3929 3930 return 0; 3931 } 3932 3933 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3934 { 3935 u32 tmp; 3936 3937 if (adev->asic_type != CHIP_ARCTURUS) 3938 return; 3939 3940 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3941 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3942 adev->df.hash_status.hash_64k); 3943 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3944 adev->df.hash_status.hash_2m); 3945 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3946 adev->df.hash_status.hash_1g); 3947 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3948 } 3949 3950 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3951 { 3952 if (adev->gfx.num_gfx_rings) 3953 gfx_v9_0_cp_gfx_enable(adev, enable); 3954 gfx_v9_0_cp_compute_enable(adev, enable); 3955 } 3956 3957 static int gfx_v9_0_hw_init(void *handle) 3958 { 3959 int r; 3960 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3961 3962 if (!amdgpu_sriov_vf(adev)) 3963 gfx_v9_0_init_golden_registers(adev); 3964 3965 gfx_v9_0_constants_init(adev); 3966 3967 gfx_v9_0_init_tcp_config(adev); 3968 3969 r = adev->gfx.rlc.funcs->resume(adev); 3970 if (r) 3971 return r; 3972 3973 r = gfx_v9_0_cp_resume(adev); 3974 if (r) 3975 return r; 3976 3977 if (adev->asic_type == CHIP_ALDEBARAN) 3978 gfx_v9_4_2_set_power_brake_sequence(adev); 3979 3980 return r; 3981 } 3982 3983 static int gfx_v9_0_hw_fini(void *handle) 3984 { 3985 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3986 3987 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3988 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3989 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3990 3991 /* DF freeze and kcq disable will fail */ 3992 if (!amdgpu_ras_intr_triggered()) 3993 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3994 amdgpu_gfx_disable_kcq(adev); 3995 3996 if (amdgpu_sriov_vf(adev)) { 3997 gfx_v9_0_cp_gfx_enable(adev, false); 3998 /* must disable polling for SRIOV when hw finished, otherwise 3999 * CPC engine may still keep fetching WB address which is already 4000 * invalid after sw finished and trigger DMAR reading error in 4001 * hypervisor side. 4002 */ 4003 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4004 return 0; 4005 } 4006 4007 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4008 * otherwise KIQ is hanging when binding back 4009 */ 4010 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4011 mutex_lock(&adev->srbm_mutex); 4012 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 4013 adev->gfx.kiq.ring.pipe, 4014 adev->gfx.kiq.ring.queue, 0); 4015 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 4016 soc15_grbm_select(adev, 0, 0, 0, 0); 4017 mutex_unlock(&adev->srbm_mutex); 4018 } 4019 4020 gfx_v9_0_cp_enable(adev, false); 4021 4022 /* Skip suspend with A+A reset */ 4023 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { 4024 dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); 4025 return 0; 4026 } 4027 4028 adev->gfx.rlc.funcs->stop(adev); 4029 return 0; 4030 } 4031 4032 static int gfx_v9_0_suspend(void *handle) 4033 { 4034 return gfx_v9_0_hw_fini(handle); 4035 } 4036 4037 static int gfx_v9_0_resume(void *handle) 4038 { 4039 return gfx_v9_0_hw_init(handle); 4040 } 4041 4042 static bool gfx_v9_0_is_idle(void *handle) 4043 { 4044 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4045 4046 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4047 GRBM_STATUS, GUI_ACTIVE)) 4048 return false; 4049 else 4050 return true; 4051 } 4052 4053 static int gfx_v9_0_wait_for_idle(void *handle) 4054 { 4055 unsigned i; 4056 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4057 4058 for (i = 0; i < adev->usec_timeout; i++) { 4059 if (gfx_v9_0_is_idle(handle)) 4060 return 0; 4061 udelay(1); 4062 } 4063 return -ETIMEDOUT; 4064 } 4065 4066 static int gfx_v9_0_soft_reset(void *handle) 4067 { 4068 u32 grbm_soft_reset = 0; 4069 u32 tmp; 4070 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4071 4072 /* GRBM_STATUS */ 4073 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4074 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4075 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4076 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4077 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4078 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4079 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4080 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4081 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4082 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4083 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4084 } 4085 4086 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4087 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4088 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4089 } 4090 4091 /* GRBM_STATUS2 */ 4092 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4093 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4094 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4095 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4096 4097 4098 if (grbm_soft_reset) { 4099 /* stop the rlc */ 4100 adev->gfx.rlc.funcs->stop(adev); 4101 4102 if (adev->gfx.num_gfx_rings) 4103 /* Disable GFX parsing/prefetching */ 4104 gfx_v9_0_cp_gfx_enable(adev, false); 4105 4106 /* Disable MEC parsing/prefetching */ 4107 gfx_v9_0_cp_compute_enable(adev, false); 4108 4109 if (grbm_soft_reset) { 4110 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4111 tmp |= grbm_soft_reset; 4112 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4113 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4114 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4115 4116 udelay(50); 4117 4118 tmp &= ~grbm_soft_reset; 4119 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4120 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4121 } 4122 4123 /* Wait a little for things to settle down */ 4124 udelay(50); 4125 } 4126 return 0; 4127 } 4128 4129 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4130 { 4131 signed long r, cnt = 0; 4132 unsigned long flags; 4133 uint32_t seq, reg_val_offs = 0; 4134 uint64_t value = 0; 4135 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4136 struct amdgpu_ring *ring = &kiq->ring; 4137 4138 BUG_ON(!ring->funcs->emit_rreg); 4139 4140 spin_lock_irqsave(&kiq->ring_lock, flags); 4141 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4142 pr_err("critical bug! too many kiq readers\n"); 4143 goto failed_unlock; 4144 } 4145 amdgpu_ring_alloc(ring, 32); 4146 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4147 amdgpu_ring_write(ring, 9 | /* src: register*/ 4148 (5 << 8) | /* dst: memory */ 4149 (1 << 16) | /* count sel */ 4150 (1 << 20)); /* write confirm */ 4151 amdgpu_ring_write(ring, 0); 4152 amdgpu_ring_write(ring, 0); 4153 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4154 reg_val_offs * 4)); 4155 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4156 reg_val_offs * 4)); 4157 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4158 if (r) 4159 goto failed_undo; 4160 4161 amdgpu_ring_commit(ring); 4162 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4163 4164 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4165 4166 /* don't wait anymore for gpu reset case because this way may 4167 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4168 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4169 * never return if we keep waiting in virt_kiq_rreg, which cause 4170 * gpu_recover() hang there. 4171 * 4172 * also don't wait anymore for IRQ context 4173 * */ 4174 if (r < 1 && (amdgpu_in_reset(adev))) 4175 goto failed_kiq_read; 4176 4177 might_sleep(); 4178 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4179 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4180 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4181 } 4182 4183 if (cnt > MAX_KIQ_REG_TRY) 4184 goto failed_kiq_read; 4185 4186 mb(); 4187 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4188 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4189 amdgpu_device_wb_free(adev, reg_val_offs); 4190 return value; 4191 4192 failed_undo: 4193 amdgpu_ring_undo(ring); 4194 failed_unlock: 4195 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4196 failed_kiq_read: 4197 if (reg_val_offs) 4198 amdgpu_device_wb_free(adev, reg_val_offs); 4199 pr_err("failed to read gpu clock\n"); 4200 return ~0; 4201 } 4202 4203 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4204 { 4205 uint64_t clock; 4206 4207 amdgpu_gfx_off_ctrl(adev, false); 4208 mutex_lock(&adev->gfx.gpu_clock_mutex); 4209 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 4210 clock = gfx_v9_0_kiq_read_clock(adev); 4211 } else { 4212 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4213 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4214 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4215 } 4216 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4217 amdgpu_gfx_off_ctrl(adev, true); 4218 return clock; 4219 } 4220 4221 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4222 uint32_t vmid, 4223 uint32_t gds_base, uint32_t gds_size, 4224 uint32_t gws_base, uint32_t gws_size, 4225 uint32_t oa_base, uint32_t oa_size) 4226 { 4227 struct amdgpu_device *adev = ring->adev; 4228 4229 /* GDS Base */ 4230 gfx_v9_0_write_data_to_reg(ring, 0, false, 4231 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4232 gds_base); 4233 4234 /* GDS Size */ 4235 gfx_v9_0_write_data_to_reg(ring, 0, false, 4236 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4237 gds_size); 4238 4239 /* GWS */ 4240 gfx_v9_0_write_data_to_reg(ring, 0, false, 4241 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4242 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4243 4244 /* OA */ 4245 gfx_v9_0_write_data_to_reg(ring, 0, false, 4246 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4247 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4248 } 4249 4250 static const u32 vgpr_init_compute_shader[] = 4251 { 4252 0xb07c0000, 0xbe8000ff, 4253 0x000000f8, 0xbf110800, 4254 0x7e000280, 0x7e020280, 4255 0x7e040280, 0x7e060280, 4256 0x7e080280, 0x7e0a0280, 4257 0x7e0c0280, 0x7e0e0280, 4258 0x80808800, 0xbe803200, 4259 0xbf84fff5, 0xbf9c0000, 4260 0xd28c0001, 0x0001007f, 4261 0xd28d0001, 0x0002027e, 4262 0x10020288, 0xb8810904, 4263 0xb7814000, 0xd1196a01, 4264 0x00000301, 0xbe800087, 4265 0xbefc00c1, 0xd89c4000, 4266 0x00020201, 0xd89cc080, 4267 0x00040401, 0x320202ff, 4268 0x00000800, 0x80808100, 4269 0xbf84fff8, 0x7e020280, 4270 0xbf810000, 0x00000000, 4271 }; 4272 4273 static const u32 sgpr_init_compute_shader[] = 4274 { 4275 0xb07c0000, 0xbe8000ff, 4276 0x0000005f, 0xbee50080, 4277 0xbe812c65, 0xbe822c65, 4278 0xbe832c65, 0xbe842c65, 4279 0xbe852c65, 0xb77c0005, 4280 0x80808500, 0xbf84fff8, 4281 0xbe800080, 0xbf810000, 4282 }; 4283 4284 static const u32 vgpr_init_compute_shader_arcturus[] = { 4285 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4286 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4287 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4288 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4289 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4290 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4291 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4292 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4293 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4294 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4295 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4296 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4297 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4298 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4299 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4300 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4301 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4302 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4303 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4304 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4305 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4306 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4307 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4308 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4309 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4310 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4311 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4312 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4313 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4314 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4315 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4316 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4317 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4318 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4319 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4320 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4321 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4322 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4323 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4324 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4325 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4326 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4327 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4328 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4329 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4330 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4331 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4332 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4333 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4334 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4335 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4336 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4337 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4338 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4339 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4340 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4341 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4342 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4343 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4344 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4345 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4346 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4347 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4348 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4349 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4350 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4351 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4352 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4353 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4354 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4355 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4356 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4357 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4358 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4359 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4360 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4361 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4362 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4363 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4364 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4365 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4366 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4367 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4368 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4369 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4370 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4371 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4372 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4373 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4374 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4375 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4376 0xbf84fff8, 0xbf810000, 4377 }; 4378 4379 /* When below register arrays changed, please update gpr_reg_size, 4380 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4381 to cover all gfx9 ASICs */ 4382 static const struct soc15_reg_entry vgpr_init_regs[] = { 4383 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4384 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4385 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4386 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4387 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4388 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4389 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4390 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4391 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4392 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4393 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4394 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4395 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4396 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4397 }; 4398 4399 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4400 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4401 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4402 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4403 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4404 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4405 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4406 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4414 }; 4415 4416 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4419 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4420 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4421 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4422 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4423 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4424 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4425 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4426 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4427 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4428 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4429 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4430 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4431 }; 4432 4433 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4434 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4435 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4436 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4437 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4438 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4448 }; 4449 4450 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4451 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4452 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4453 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4454 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4455 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4456 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4457 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4458 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4459 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4460 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4461 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4462 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4463 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4464 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4465 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4466 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4467 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4468 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4469 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4470 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4471 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4472 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4473 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4474 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4475 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4476 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4477 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4478 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4479 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4480 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4481 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4482 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4483 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4484 }; 4485 4486 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4487 { 4488 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4489 int i, r; 4490 4491 /* only support when RAS is enabled */ 4492 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4493 return 0; 4494 4495 r = amdgpu_ring_alloc(ring, 7); 4496 if (r) { 4497 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4498 ring->name, r); 4499 return r; 4500 } 4501 4502 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4503 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4504 4505 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4506 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4507 PACKET3_DMA_DATA_DST_SEL(1) | 4508 PACKET3_DMA_DATA_SRC_SEL(2) | 4509 PACKET3_DMA_DATA_ENGINE(0))); 4510 amdgpu_ring_write(ring, 0); 4511 amdgpu_ring_write(ring, 0); 4512 amdgpu_ring_write(ring, 0); 4513 amdgpu_ring_write(ring, 0); 4514 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4515 adev->gds.gds_size); 4516 4517 amdgpu_ring_commit(ring); 4518 4519 for (i = 0; i < adev->usec_timeout; i++) { 4520 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4521 break; 4522 udelay(1); 4523 } 4524 4525 if (i >= adev->usec_timeout) 4526 r = -ETIMEDOUT; 4527 4528 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4529 4530 return r; 4531 } 4532 4533 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4534 { 4535 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4536 struct amdgpu_ib ib; 4537 struct dma_fence *f = NULL; 4538 int r, i; 4539 unsigned total_size, vgpr_offset, sgpr_offset; 4540 u64 gpu_addr; 4541 4542 int compute_dim_x = adev->gfx.config.max_shader_engines * 4543 adev->gfx.config.max_cu_per_sh * 4544 adev->gfx.config.max_sh_per_se; 4545 int sgpr_work_group_size = 5; 4546 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4547 int vgpr_init_shader_size; 4548 const u32 *vgpr_init_shader_ptr; 4549 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4550 4551 /* only support when RAS is enabled */ 4552 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4553 return 0; 4554 4555 /* bail if the compute ring is not ready */ 4556 if (!ring->sched.ready) 4557 return 0; 4558 4559 if (adev->asic_type == CHIP_ARCTURUS || 4560 adev->asic_type == CHIP_ALDEBARAN) { 4561 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4562 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4563 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4564 } else { 4565 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4566 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4567 vgpr_init_regs_ptr = vgpr_init_regs; 4568 } 4569 4570 total_size = 4571 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4572 total_size += 4573 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4574 total_size += 4575 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4576 total_size = ALIGN(total_size, 256); 4577 vgpr_offset = total_size; 4578 total_size += ALIGN(vgpr_init_shader_size, 256); 4579 sgpr_offset = total_size; 4580 total_size += sizeof(sgpr_init_compute_shader); 4581 4582 /* allocate an indirect buffer to put the commands in */ 4583 memset(&ib, 0, sizeof(ib)); 4584 r = amdgpu_ib_get(adev, NULL, total_size, 4585 AMDGPU_IB_POOL_DIRECT, &ib); 4586 if (r) { 4587 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4588 return r; 4589 } 4590 4591 /* load the compute shaders */ 4592 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4593 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4594 4595 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4596 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4597 4598 /* init the ib length to 0 */ 4599 ib.length_dw = 0; 4600 4601 /* VGPR */ 4602 /* write the register state for the compute dispatch */ 4603 for (i = 0; i < gpr_reg_size; i++) { 4604 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4605 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4606 - PACKET3_SET_SH_REG_START; 4607 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4608 } 4609 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4610 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4611 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4612 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4613 - PACKET3_SET_SH_REG_START; 4614 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4615 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4616 4617 /* write dispatch packet */ 4618 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4619 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4620 ib.ptr[ib.length_dw++] = 1; /* y */ 4621 ib.ptr[ib.length_dw++] = 1; /* z */ 4622 ib.ptr[ib.length_dw++] = 4623 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4624 4625 /* write CS partial flush packet */ 4626 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4627 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4628 4629 /* SGPR1 */ 4630 /* write the register state for the compute dispatch */ 4631 for (i = 0; i < gpr_reg_size; i++) { 4632 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4633 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4634 - PACKET3_SET_SH_REG_START; 4635 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4636 } 4637 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4638 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4639 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4640 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4641 - PACKET3_SET_SH_REG_START; 4642 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4643 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4644 4645 /* write dispatch packet */ 4646 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4647 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4648 ib.ptr[ib.length_dw++] = 1; /* y */ 4649 ib.ptr[ib.length_dw++] = 1; /* z */ 4650 ib.ptr[ib.length_dw++] = 4651 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4652 4653 /* write CS partial flush packet */ 4654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4655 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4656 4657 /* SGPR2 */ 4658 /* write the register state for the compute dispatch */ 4659 for (i = 0; i < gpr_reg_size; i++) { 4660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4661 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4662 - PACKET3_SET_SH_REG_START; 4663 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4664 } 4665 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4666 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4667 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4668 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4669 - PACKET3_SET_SH_REG_START; 4670 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4671 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4672 4673 /* write dispatch packet */ 4674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4675 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4676 ib.ptr[ib.length_dw++] = 1; /* y */ 4677 ib.ptr[ib.length_dw++] = 1; /* z */ 4678 ib.ptr[ib.length_dw++] = 4679 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4680 4681 /* write CS partial flush packet */ 4682 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4683 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4684 4685 /* shedule the ib on the ring */ 4686 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4687 if (r) { 4688 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4689 goto fail; 4690 } 4691 4692 /* wait for the GPU to finish processing the IB */ 4693 r = dma_fence_wait(f, false); 4694 if (r) { 4695 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4696 goto fail; 4697 } 4698 4699 fail: 4700 amdgpu_ib_free(adev, &ib, NULL); 4701 dma_fence_put(f); 4702 4703 return r; 4704 } 4705 4706 static int gfx_v9_0_early_init(void *handle) 4707 { 4708 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4709 4710 if (adev->asic_type == CHIP_ARCTURUS || 4711 adev->asic_type == CHIP_ALDEBARAN) 4712 adev->gfx.num_gfx_rings = 0; 4713 else 4714 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4715 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4716 AMDGPU_MAX_COMPUTE_RINGS); 4717 gfx_v9_0_set_kiq_pm4_funcs(adev); 4718 gfx_v9_0_set_ring_funcs(adev); 4719 gfx_v9_0_set_irq_funcs(adev); 4720 gfx_v9_0_set_gds_init(adev); 4721 gfx_v9_0_set_rlc_funcs(adev); 4722 4723 return 0; 4724 } 4725 4726 static int gfx_v9_0_ecc_late_init(void *handle) 4727 { 4728 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4729 int r; 4730 4731 /* 4732 * Temp workaround to fix the issue that CP firmware fails to 4733 * update read pointer when CPDMA is writing clearing operation 4734 * to GDS in suspend/resume sequence on several cards. So just 4735 * limit this operation in cold boot sequence. 4736 */ 4737 if ((!adev->in_suspend) && 4738 (adev->gds.gds_size)) { 4739 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4740 if (r) 4741 return r; 4742 } 4743 4744 /* requires IBs so do in late init after IB pool is initialized */ 4745 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4746 if (r) 4747 return r; 4748 4749 if (adev->gfx.ras_funcs && 4750 adev->gfx.ras_funcs->ras_late_init) { 4751 r = adev->gfx.ras_funcs->ras_late_init(adev); 4752 if (r) 4753 return r; 4754 } 4755 4756 if (adev->gfx.ras_funcs && 4757 adev->gfx.ras_funcs->enable_watchdog_timer) 4758 adev->gfx.ras_funcs->enable_watchdog_timer(adev); 4759 4760 return 0; 4761 } 4762 4763 static int gfx_v9_0_late_init(void *handle) 4764 { 4765 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4766 int r; 4767 4768 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4769 if (r) 4770 return r; 4771 4772 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4773 if (r) 4774 return r; 4775 4776 r = gfx_v9_0_ecc_late_init(handle); 4777 if (r) 4778 return r; 4779 4780 return 0; 4781 } 4782 4783 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4784 { 4785 uint32_t rlc_setting; 4786 4787 /* if RLC is not enabled, do nothing */ 4788 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4789 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4790 return false; 4791 4792 return true; 4793 } 4794 4795 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4796 { 4797 uint32_t data; 4798 unsigned i; 4799 4800 data = RLC_SAFE_MODE__CMD_MASK; 4801 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4802 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4803 4804 /* wait for RLC_SAFE_MODE */ 4805 for (i = 0; i < adev->usec_timeout; i++) { 4806 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4807 break; 4808 udelay(1); 4809 } 4810 } 4811 4812 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4813 { 4814 uint32_t data; 4815 4816 data = RLC_SAFE_MODE__CMD_MASK; 4817 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4818 } 4819 4820 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4821 bool enable) 4822 { 4823 amdgpu_gfx_rlc_enter_safe_mode(adev); 4824 4825 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4826 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4827 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4828 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4829 } else { 4830 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4831 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4832 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4833 } 4834 4835 amdgpu_gfx_rlc_exit_safe_mode(adev); 4836 } 4837 4838 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4839 bool enable) 4840 { 4841 /* TODO: double check if we need to perform under safe mode */ 4842 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4843 4844 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4845 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4846 else 4847 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4848 4849 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4850 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4851 else 4852 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4853 4854 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4855 } 4856 4857 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4858 bool enable) 4859 { 4860 uint32_t data, def; 4861 4862 amdgpu_gfx_rlc_enter_safe_mode(adev); 4863 4864 /* It is disabled by HW by default */ 4865 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4866 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4867 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4868 4869 if (adev->asic_type != CHIP_VEGA12) 4870 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4871 4872 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4873 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4874 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4875 4876 /* only for Vega10 & Raven1 */ 4877 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4878 4879 if (def != data) 4880 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4881 4882 /* MGLS is a global flag to control all MGLS in GFX */ 4883 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4884 /* 2 - RLC memory Light sleep */ 4885 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4886 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4887 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4888 if (def != data) 4889 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4890 } 4891 /* 3 - CP memory Light sleep */ 4892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4893 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4894 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4895 if (def != data) 4896 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4897 } 4898 } 4899 } else { 4900 /* 1 - MGCG_OVERRIDE */ 4901 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4902 4903 if (adev->asic_type != CHIP_VEGA12) 4904 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4905 4906 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4907 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4908 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4909 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4910 4911 if (def != data) 4912 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4913 4914 /* 2 - disable MGLS in RLC */ 4915 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4916 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4917 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4918 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4919 } 4920 4921 /* 3 - disable MGLS in CP */ 4922 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4923 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4924 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4925 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4926 } 4927 } 4928 4929 amdgpu_gfx_rlc_exit_safe_mode(adev); 4930 } 4931 4932 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4933 bool enable) 4934 { 4935 uint32_t data, def; 4936 4937 if (!adev->gfx.num_gfx_rings) 4938 return; 4939 4940 amdgpu_gfx_rlc_enter_safe_mode(adev); 4941 4942 /* Enable 3D CGCG/CGLS */ 4943 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4944 /* write cmd to clear cgcg/cgls ov */ 4945 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4946 /* unset CGCG override */ 4947 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4948 /* update CGCG and CGLS override bits */ 4949 if (def != data) 4950 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4951 4952 /* enable 3Dcgcg FSM(0x0000363f) */ 4953 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4954 4955 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4956 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4957 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4958 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4959 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4960 if (def != data) 4961 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4962 4963 /* set IDLE_POLL_COUNT(0x00900100) */ 4964 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4965 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4966 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4967 if (def != data) 4968 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4969 } else { 4970 /* Disable CGCG/CGLS */ 4971 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4972 /* disable cgcg, cgls should be disabled */ 4973 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4974 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4975 /* disable cgcg and cgls in FSM */ 4976 if (def != data) 4977 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4978 } 4979 4980 amdgpu_gfx_rlc_exit_safe_mode(adev); 4981 } 4982 4983 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4984 bool enable) 4985 { 4986 uint32_t def, data; 4987 4988 amdgpu_gfx_rlc_enter_safe_mode(adev); 4989 4990 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4991 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4992 /* unset CGCG override */ 4993 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4994 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4995 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4996 else 4997 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4998 /* update CGCG and CGLS override bits */ 4999 if (def != data) 5000 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5001 5002 /* enable cgcg FSM(0x0000363F) */ 5003 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5004 5005 if (adev->asic_type == CHIP_ARCTURUS) 5006 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5007 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5008 else 5009 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5010 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5011 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5012 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5013 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5014 if (def != data) 5015 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5016 5017 /* set IDLE_POLL_COUNT(0x00900100) */ 5018 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5019 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5020 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5021 if (def != data) 5022 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5023 } else { 5024 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5025 /* reset CGCG/CGLS bits */ 5026 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5027 /* disable cgcg and cgls in FSM */ 5028 if (def != data) 5029 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5030 } 5031 5032 amdgpu_gfx_rlc_exit_safe_mode(adev); 5033 } 5034 5035 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5036 bool enable) 5037 { 5038 if (enable) { 5039 /* CGCG/CGLS should be enabled after MGCG/MGLS 5040 * === MGCG + MGLS === 5041 */ 5042 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5043 /* === CGCG /CGLS for GFX 3D Only === */ 5044 gfx_v9_0_update_3d_clock_gating(adev, enable); 5045 /* === CGCG + CGLS === */ 5046 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5047 } else { 5048 /* CGCG/CGLS should be disabled before MGCG/MGLS 5049 * === CGCG + CGLS === 5050 */ 5051 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5052 /* === CGCG /CGLS for GFX 3D Only === */ 5053 gfx_v9_0_update_3d_clock_gating(adev, enable); 5054 /* === MGCG + MGLS === */ 5055 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5056 } 5057 return 0; 5058 } 5059 5060 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5061 { 5062 u32 reg, data; 5063 5064 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5065 if (amdgpu_sriov_is_pp_one_vf(adev)) 5066 data = RREG32_NO_KIQ(reg); 5067 else 5068 data = RREG32(reg); 5069 5070 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5071 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5072 5073 if (amdgpu_sriov_is_pp_one_vf(adev)) 5074 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5075 else 5076 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5077 } 5078 5079 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5080 uint32_t offset, 5081 struct soc15_reg_rlcg *entries, int arr_size) 5082 { 5083 int i; 5084 uint32_t reg; 5085 5086 if (!entries) 5087 return false; 5088 5089 for (i = 0; i < arr_size; i++) { 5090 const struct soc15_reg_rlcg *entry; 5091 5092 entry = &entries[i]; 5093 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5094 if (offset == reg) 5095 return true; 5096 } 5097 5098 return false; 5099 } 5100 5101 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5102 { 5103 return gfx_v9_0_check_rlcg_range(adev, offset, 5104 (void *)rlcg_access_gc_9_0, 5105 ARRAY_SIZE(rlcg_access_gc_9_0)); 5106 } 5107 5108 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5109 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5110 .set_safe_mode = gfx_v9_0_set_safe_mode, 5111 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5112 .init = gfx_v9_0_rlc_init, 5113 .get_csb_size = gfx_v9_0_get_csb_size, 5114 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5115 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5116 .resume = gfx_v9_0_rlc_resume, 5117 .stop = gfx_v9_0_rlc_stop, 5118 .reset = gfx_v9_0_rlc_reset, 5119 .start = gfx_v9_0_rlc_start, 5120 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5121 .rlcg_wreg = gfx_v9_0_rlcg_wreg, 5122 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5123 }; 5124 5125 static int gfx_v9_0_set_powergating_state(void *handle, 5126 enum amd_powergating_state state) 5127 { 5128 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5129 bool enable = (state == AMD_PG_STATE_GATE); 5130 5131 switch (adev->asic_type) { 5132 case CHIP_RAVEN: 5133 case CHIP_RENOIR: 5134 if (!enable) 5135 amdgpu_gfx_off_ctrl(adev, false); 5136 5137 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5138 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5139 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5140 } else { 5141 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5142 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5143 } 5144 5145 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5146 gfx_v9_0_enable_cp_power_gating(adev, true); 5147 else 5148 gfx_v9_0_enable_cp_power_gating(adev, false); 5149 5150 /* update gfx cgpg state */ 5151 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5152 5153 /* update mgcg state */ 5154 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5155 5156 if (enable) 5157 amdgpu_gfx_off_ctrl(adev, true); 5158 break; 5159 case CHIP_VEGA12: 5160 amdgpu_gfx_off_ctrl(adev, enable); 5161 break; 5162 default: 5163 break; 5164 } 5165 5166 return 0; 5167 } 5168 5169 static int gfx_v9_0_set_clockgating_state(void *handle, 5170 enum amd_clockgating_state state) 5171 { 5172 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5173 5174 if (amdgpu_sriov_vf(adev)) 5175 return 0; 5176 5177 switch (adev->asic_type) { 5178 case CHIP_VEGA10: 5179 case CHIP_VEGA12: 5180 case CHIP_VEGA20: 5181 case CHIP_RAVEN: 5182 case CHIP_ARCTURUS: 5183 case CHIP_RENOIR: 5184 case CHIP_ALDEBARAN: 5185 gfx_v9_0_update_gfx_clock_gating(adev, 5186 state == AMD_CG_STATE_GATE); 5187 break; 5188 default: 5189 break; 5190 } 5191 return 0; 5192 } 5193 5194 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 5195 { 5196 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5197 int data; 5198 5199 if (amdgpu_sriov_vf(adev)) 5200 *flags = 0; 5201 5202 /* AMD_CG_SUPPORT_GFX_MGCG */ 5203 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5204 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5205 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5206 5207 /* AMD_CG_SUPPORT_GFX_CGCG */ 5208 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5209 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5210 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5211 5212 /* AMD_CG_SUPPORT_GFX_CGLS */ 5213 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5214 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5215 5216 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5217 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5218 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5219 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5220 5221 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5222 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5223 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5224 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5225 5226 if (adev->asic_type != CHIP_ARCTURUS) { 5227 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5228 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5229 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5230 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5231 5232 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5233 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5234 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5235 } 5236 } 5237 5238 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5239 { 5240 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 5241 } 5242 5243 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5244 { 5245 struct amdgpu_device *adev = ring->adev; 5246 u64 wptr; 5247 5248 /* XXX check if swapping is necessary on BE */ 5249 if (ring->use_doorbell) { 5250 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 5251 } else { 5252 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5253 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5254 } 5255 5256 return wptr; 5257 } 5258 5259 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5260 { 5261 struct amdgpu_device *adev = ring->adev; 5262 5263 if (ring->use_doorbell) { 5264 /* XXX check if swapping is necessary on BE */ 5265 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5266 WDOORBELL64(ring->doorbell_index, ring->wptr); 5267 } else { 5268 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5269 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5270 } 5271 } 5272 5273 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5274 { 5275 struct amdgpu_device *adev = ring->adev; 5276 u32 ref_and_mask, reg_mem_engine; 5277 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5278 5279 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5280 switch (ring->me) { 5281 case 1: 5282 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5283 break; 5284 case 2: 5285 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5286 break; 5287 default: 5288 return; 5289 } 5290 reg_mem_engine = 0; 5291 } else { 5292 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5293 reg_mem_engine = 1; /* pfp */ 5294 } 5295 5296 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5297 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5298 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5299 ref_and_mask, ref_and_mask, 0x20); 5300 } 5301 5302 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5303 struct amdgpu_job *job, 5304 struct amdgpu_ib *ib, 5305 uint32_t flags) 5306 { 5307 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5308 u32 header, control = 0; 5309 5310 if (ib->flags & AMDGPU_IB_FLAG_CE) 5311 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5312 else 5313 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5314 5315 control |= ib->length_dw | (vmid << 24); 5316 5317 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5318 control |= INDIRECT_BUFFER_PRE_ENB(1); 5319 5320 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5321 gfx_v9_0_ring_emit_de_meta(ring); 5322 } 5323 5324 amdgpu_ring_write(ring, header); 5325 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5326 amdgpu_ring_write(ring, 5327 #ifdef __BIG_ENDIAN 5328 (2 << 0) | 5329 #endif 5330 lower_32_bits(ib->gpu_addr)); 5331 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5332 amdgpu_ring_write(ring, control); 5333 } 5334 5335 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5336 struct amdgpu_job *job, 5337 struct amdgpu_ib *ib, 5338 uint32_t flags) 5339 { 5340 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5341 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5342 5343 /* Currently, there is a high possibility to get wave ID mismatch 5344 * between ME and GDS, leading to a hw deadlock, because ME generates 5345 * different wave IDs than the GDS expects. This situation happens 5346 * randomly when at least 5 compute pipes use GDS ordered append. 5347 * The wave IDs generated by ME are also wrong after suspend/resume. 5348 * Those are probably bugs somewhere else in the kernel driver. 5349 * 5350 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5351 * GDS to 0 for this ring (me/pipe). 5352 */ 5353 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5354 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5355 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5356 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5357 } 5358 5359 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5360 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5361 amdgpu_ring_write(ring, 5362 #ifdef __BIG_ENDIAN 5363 (2 << 0) | 5364 #endif 5365 lower_32_bits(ib->gpu_addr)); 5366 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5367 amdgpu_ring_write(ring, control); 5368 } 5369 5370 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5371 u64 seq, unsigned flags) 5372 { 5373 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5374 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5375 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5376 5377 /* RELEASE_MEM - flush caches, send int */ 5378 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5379 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5380 EOP_TC_NC_ACTION_EN) : 5381 (EOP_TCL1_ACTION_EN | 5382 EOP_TC_ACTION_EN | 5383 EOP_TC_WB_ACTION_EN | 5384 EOP_TC_MD_ACTION_EN)) | 5385 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5386 EVENT_INDEX(5))); 5387 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5388 5389 /* 5390 * the address should be Qword aligned if 64bit write, Dword 5391 * aligned if only send 32bit data low (discard data high) 5392 */ 5393 if (write64bit) 5394 BUG_ON(addr & 0x7); 5395 else 5396 BUG_ON(addr & 0x3); 5397 amdgpu_ring_write(ring, lower_32_bits(addr)); 5398 amdgpu_ring_write(ring, upper_32_bits(addr)); 5399 amdgpu_ring_write(ring, lower_32_bits(seq)); 5400 amdgpu_ring_write(ring, upper_32_bits(seq)); 5401 amdgpu_ring_write(ring, 0); 5402 } 5403 5404 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5405 { 5406 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5407 uint32_t seq = ring->fence_drv.sync_seq; 5408 uint64_t addr = ring->fence_drv.gpu_addr; 5409 5410 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5411 lower_32_bits(addr), upper_32_bits(addr), 5412 seq, 0xffffffff, 4); 5413 } 5414 5415 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5416 unsigned vmid, uint64_t pd_addr) 5417 { 5418 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5419 5420 /* compute doesn't have PFP */ 5421 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5422 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5423 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5424 amdgpu_ring_write(ring, 0x0); 5425 } 5426 } 5427 5428 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5429 { 5430 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5431 } 5432 5433 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5434 { 5435 u64 wptr; 5436 5437 /* XXX check if swapping is necessary on BE */ 5438 if (ring->use_doorbell) 5439 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5440 else 5441 BUG(); 5442 return wptr; 5443 } 5444 5445 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5446 { 5447 struct amdgpu_device *adev = ring->adev; 5448 5449 /* XXX check if swapping is necessary on BE */ 5450 if (ring->use_doorbell) { 5451 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5452 WDOORBELL64(ring->doorbell_index, ring->wptr); 5453 } else{ 5454 BUG(); /* only DOORBELL method supported on gfx9 now */ 5455 } 5456 } 5457 5458 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5459 u64 seq, unsigned int flags) 5460 { 5461 struct amdgpu_device *adev = ring->adev; 5462 5463 /* we only allocate 32bit for each seq wb address */ 5464 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5465 5466 /* write fence seq to the "addr" */ 5467 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5468 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5469 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5470 amdgpu_ring_write(ring, lower_32_bits(addr)); 5471 amdgpu_ring_write(ring, upper_32_bits(addr)); 5472 amdgpu_ring_write(ring, lower_32_bits(seq)); 5473 5474 if (flags & AMDGPU_FENCE_FLAG_INT) { 5475 /* set register to trigger INT */ 5476 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5477 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5478 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5479 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5480 amdgpu_ring_write(ring, 0); 5481 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5482 } 5483 } 5484 5485 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5486 { 5487 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5488 amdgpu_ring_write(ring, 0); 5489 } 5490 5491 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5492 { 5493 struct v9_ce_ib_state ce_payload = {0}; 5494 uint64_t csa_addr; 5495 int cnt; 5496 5497 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5498 csa_addr = amdgpu_csa_vaddr(ring->adev); 5499 5500 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5501 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5502 WRITE_DATA_DST_SEL(8) | 5503 WR_CONFIRM) | 5504 WRITE_DATA_CACHE_POLICY(0)); 5505 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5506 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5507 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5508 } 5509 5510 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5511 { 5512 struct v9_de_ib_state de_payload = {0}; 5513 uint64_t csa_addr, gds_addr; 5514 int cnt; 5515 5516 csa_addr = amdgpu_csa_vaddr(ring->adev); 5517 gds_addr = csa_addr + 4096; 5518 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5519 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5520 5521 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5522 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5523 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5524 WRITE_DATA_DST_SEL(8) | 5525 WR_CONFIRM) | 5526 WRITE_DATA_CACHE_POLICY(0)); 5527 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5528 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5529 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5530 } 5531 5532 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5533 bool secure) 5534 { 5535 uint32_t v = secure ? FRAME_TMZ : 0; 5536 5537 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5538 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5539 } 5540 5541 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5542 { 5543 uint32_t dw2 = 0; 5544 5545 if (amdgpu_sriov_vf(ring->adev)) 5546 gfx_v9_0_ring_emit_ce_meta(ring); 5547 5548 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5549 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5550 /* set load_global_config & load_global_uconfig */ 5551 dw2 |= 0x8001; 5552 /* set load_cs_sh_regs */ 5553 dw2 |= 0x01000000; 5554 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5555 dw2 |= 0x10002; 5556 5557 /* set load_ce_ram if preamble presented */ 5558 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5559 dw2 |= 0x10000000; 5560 } else { 5561 /* still load_ce_ram if this is the first time preamble presented 5562 * although there is no context switch happens. 5563 */ 5564 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5565 dw2 |= 0x10000000; 5566 } 5567 5568 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5569 amdgpu_ring_write(ring, dw2); 5570 amdgpu_ring_write(ring, 0); 5571 } 5572 5573 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5574 { 5575 unsigned ret; 5576 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5577 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5578 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5579 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5580 ret = ring->wptr & ring->buf_mask; 5581 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5582 return ret; 5583 } 5584 5585 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5586 { 5587 unsigned cur; 5588 BUG_ON(offset > ring->buf_mask); 5589 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5590 5591 cur = (ring->wptr & ring->buf_mask) - 1; 5592 if (likely(cur > offset)) 5593 ring->ring[offset] = cur - offset; 5594 else 5595 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5596 } 5597 5598 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5599 uint32_t reg_val_offs) 5600 { 5601 struct amdgpu_device *adev = ring->adev; 5602 5603 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5604 amdgpu_ring_write(ring, 0 | /* src: register*/ 5605 (5 << 8) | /* dst: memory */ 5606 (1 << 20)); /* write confirm */ 5607 amdgpu_ring_write(ring, reg); 5608 amdgpu_ring_write(ring, 0); 5609 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5610 reg_val_offs * 4)); 5611 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5612 reg_val_offs * 4)); 5613 } 5614 5615 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5616 uint32_t val) 5617 { 5618 uint32_t cmd = 0; 5619 5620 switch (ring->funcs->type) { 5621 case AMDGPU_RING_TYPE_GFX: 5622 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5623 break; 5624 case AMDGPU_RING_TYPE_KIQ: 5625 cmd = (1 << 16); /* no inc addr */ 5626 break; 5627 default: 5628 cmd = WR_CONFIRM; 5629 break; 5630 } 5631 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5632 amdgpu_ring_write(ring, cmd); 5633 amdgpu_ring_write(ring, reg); 5634 amdgpu_ring_write(ring, 0); 5635 amdgpu_ring_write(ring, val); 5636 } 5637 5638 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5639 uint32_t val, uint32_t mask) 5640 { 5641 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5642 } 5643 5644 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5645 uint32_t reg0, uint32_t reg1, 5646 uint32_t ref, uint32_t mask) 5647 { 5648 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5649 struct amdgpu_device *adev = ring->adev; 5650 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5651 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5652 5653 if (fw_version_ok) 5654 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5655 ref, mask, 0x20); 5656 else 5657 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5658 ref, mask); 5659 } 5660 5661 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5662 { 5663 struct amdgpu_device *adev = ring->adev; 5664 uint32_t value = 0; 5665 5666 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5667 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5668 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5669 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5670 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5671 } 5672 5673 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5674 enum amdgpu_interrupt_state state) 5675 { 5676 switch (state) { 5677 case AMDGPU_IRQ_STATE_DISABLE: 5678 case AMDGPU_IRQ_STATE_ENABLE: 5679 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5680 TIME_STAMP_INT_ENABLE, 5681 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5682 break; 5683 default: 5684 break; 5685 } 5686 } 5687 5688 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5689 int me, int pipe, 5690 enum amdgpu_interrupt_state state) 5691 { 5692 u32 mec_int_cntl, mec_int_cntl_reg; 5693 5694 /* 5695 * amdgpu controls only the first MEC. That's why this function only 5696 * handles the setting of interrupts for this specific MEC. All other 5697 * pipes' interrupts are set by amdkfd. 5698 */ 5699 5700 if (me == 1) { 5701 switch (pipe) { 5702 case 0: 5703 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5704 break; 5705 case 1: 5706 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5707 break; 5708 case 2: 5709 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5710 break; 5711 case 3: 5712 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5713 break; 5714 default: 5715 DRM_DEBUG("invalid pipe %d\n", pipe); 5716 return; 5717 } 5718 } else { 5719 DRM_DEBUG("invalid me %d\n", me); 5720 return; 5721 } 5722 5723 switch (state) { 5724 case AMDGPU_IRQ_STATE_DISABLE: 5725 mec_int_cntl = RREG32(mec_int_cntl_reg); 5726 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5727 TIME_STAMP_INT_ENABLE, 0); 5728 WREG32(mec_int_cntl_reg, mec_int_cntl); 5729 break; 5730 case AMDGPU_IRQ_STATE_ENABLE: 5731 mec_int_cntl = RREG32(mec_int_cntl_reg); 5732 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5733 TIME_STAMP_INT_ENABLE, 1); 5734 WREG32(mec_int_cntl_reg, mec_int_cntl); 5735 break; 5736 default: 5737 break; 5738 } 5739 } 5740 5741 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5742 struct amdgpu_irq_src *source, 5743 unsigned type, 5744 enum amdgpu_interrupt_state state) 5745 { 5746 switch (state) { 5747 case AMDGPU_IRQ_STATE_DISABLE: 5748 case AMDGPU_IRQ_STATE_ENABLE: 5749 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5750 PRIV_REG_INT_ENABLE, 5751 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5752 break; 5753 default: 5754 break; 5755 } 5756 5757 return 0; 5758 } 5759 5760 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5761 struct amdgpu_irq_src *source, 5762 unsigned type, 5763 enum amdgpu_interrupt_state state) 5764 { 5765 switch (state) { 5766 case AMDGPU_IRQ_STATE_DISABLE: 5767 case AMDGPU_IRQ_STATE_ENABLE: 5768 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5769 PRIV_INSTR_INT_ENABLE, 5770 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5771 break; 5772 default: 5773 break; 5774 } 5775 5776 return 0; 5777 } 5778 5779 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5780 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5781 CP_ECC_ERROR_INT_ENABLE, 1) 5782 5783 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5784 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5785 CP_ECC_ERROR_INT_ENABLE, 0) 5786 5787 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5788 struct amdgpu_irq_src *source, 5789 unsigned type, 5790 enum amdgpu_interrupt_state state) 5791 { 5792 switch (state) { 5793 case AMDGPU_IRQ_STATE_DISABLE: 5794 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5795 CP_ECC_ERROR_INT_ENABLE, 0); 5796 DISABLE_ECC_ON_ME_PIPE(1, 0); 5797 DISABLE_ECC_ON_ME_PIPE(1, 1); 5798 DISABLE_ECC_ON_ME_PIPE(1, 2); 5799 DISABLE_ECC_ON_ME_PIPE(1, 3); 5800 break; 5801 5802 case AMDGPU_IRQ_STATE_ENABLE: 5803 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5804 CP_ECC_ERROR_INT_ENABLE, 1); 5805 ENABLE_ECC_ON_ME_PIPE(1, 0); 5806 ENABLE_ECC_ON_ME_PIPE(1, 1); 5807 ENABLE_ECC_ON_ME_PIPE(1, 2); 5808 ENABLE_ECC_ON_ME_PIPE(1, 3); 5809 break; 5810 default: 5811 break; 5812 } 5813 5814 return 0; 5815 } 5816 5817 5818 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5819 struct amdgpu_irq_src *src, 5820 unsigned type, 5821 enum amdgpu_interrupt_state state) 5822 { 5823 switch (type) { 5824 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5825 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5826 break; 5827 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5828 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5829 break; 5830 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5831 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5832 break; 5833 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5834 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5835 break; 5836 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5837 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5838 break; 5839 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5840 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5841 break; 5842 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5843 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5844 break; 5845 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5846 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5847 break; 5848 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5849 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5850 break; 5851 default: 5852 break; 5853 } 5854 return 0; 5855 } 5856 5857 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5858 struct amdgpu_irq_src *source, 5859 struct amdgpu_iv_entry *entry) 5860 { 5861 int i; 5862 u8 me_id, pipe_id, queue_id; 5863 struct amdgpu_ring *ring; 5864 5865 DRM_DEBUG("IH: CP EOP\n"); 5866 me_id = (entry->ring_id & 0x0c) >> 2; 5867 pipe_id = (entry->ring_id & 0x03) >> 0; 5868 queue_id = (entry->ring_id & 0x70) >> 4; 5869 5870 switch (me_id) { 5871 case 0: 5872 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5873 break; 5874 case 1: 5875 case 2: 5876 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5877 ring = &adev->gfx.compute_ring[i]; 5878 /* Per-queue interrupt is supported for MEC starting from VI. 5879 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5880 */ 5881 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5882 amdgpu_fence_process(ring); 5883 } 5884 break; 5885 } 5886 return 0; 5887 } 5888 5889 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5890 struct amdgpu_iv_entry *entry) 5891 { 5892 u8 me_id, pipe_id, queue_id; 5893 struct amdgpu_ring *ring; 5894 int i; 5895 5896 me_id = (entry->ring_id & 0x0c) >> 2; 5897 pipe_id = (entry->ring_id & 0x03) >> 0; 5898 queue_id = (entry->ring_id & 0x70) >> 4; 5899 5900 switch (me_id) { 5901 case 0: 5902 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5903 break; 5904 case 1: 5905 case 2: 5906 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5907 ring = &adev->gfx.compute_ring[i]; 5908 if (ring->me == me_id && ring->pipe == pipe_id && 5909 ring->queue == queue_id) 5910 drm_sched_fault(&ring->sched); 5911 } 5912 break; 5913 } 5914 } 5915 5916 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5917 struct amdgpu_irq_src *source, 5918 struct amdgpu_iv_entry *entry) 5919 { 5920 DRM_ERROR("Illegal register access in command stream\n"); 5921 gfx_v9_0_fault(adev, entry); 5922 return 0; 5923 } 5924 5925 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5926 struct amdgpu_irq_src *source, 5927 struct amdgpu_iv_entry *entry) 5928 { 5929 DRM_ERROR("Illegal instruction in command stream\n"); 5930 gfx_v9_0_fault(adev, entry); 5931 return 0; 5932 } 5933 5934 5935 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 5936 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5937 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5938 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5939 }, 5940 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5941 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5942 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5943 }, 5944 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5945 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5946 0, 0 5947 }, 5948 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5949 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5950 0, 0 5951 }, 5952 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5953 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5954 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5955 }, 5956 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5957 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5958 0, 0 5959 }, 5960 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5961 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5962 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5963 }, 5964 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5965 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5966 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5967 }, 5968 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5969 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5970 0, 0 5971 }, 5972 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5973 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5974 0, 0 5975 }, 5976 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5977 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5978 0, 0 5979 }, 5980 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5981 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5982 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5983 }, 5984 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5985 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 5986 0, 0 5987 }, 5988 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5989 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5990 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 5991 }, 5992 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5993 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5994 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5995 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 5996 }, 5997 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5998 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5999 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6000 0, 0 6001 }, 6002 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6003 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6004 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6005 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6006 }, 6007 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6008 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6009 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6010 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6011 }, 6012 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6013 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6014 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6015 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6016 }, 6017 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6018 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6019 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6020 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6021 }, 6022 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6023 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6024 0, 0 6025 }, 6026 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6027 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6028 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6029 }, 6030 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6031 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6032 0, 0 6033 }, 6034 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6035 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6036 0, 0 6037 }, 6038 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6039 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6040 0, 0 6041 }, 6042 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6043 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6044 0, 0 6045 }, 6046 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6047 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6048 0, 0 6049 }, 6050 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6051 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6052 0, 0 6053 }, 6054 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6055 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6056 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6057 }, 6058 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6059 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6060 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6061 }, 6062 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6063 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6064 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6065 }, 6066 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6067 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6068 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6069 }, 6070 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6071 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6072 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6073 }, 6074 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6075 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6076 0, 0 6077 }, 6078 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6079 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6080 0, 0 6081 }, 6082 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6083 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6084 0, 0 6085 }, 6086 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6087 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6088 0, 0 6089 }, 6090 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6091 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6092 0, 0 6093 }, 6094 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6095 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6096 0, 0 6097 }, 6098 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6099 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6100 0, 0 6101 }, 6102 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6103 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6104 0, 0 6105 }, 6106 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6107 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6108 0, 0 6109 }, 6110 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6111 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6112 0, 0 6113 }, 6114 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6115 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6116 0, 0 6117 }, 6118 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6119 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6120 0, 0 6121 }, 6122 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6123 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6124 0, 0 6125 }, 6126 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6127 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6128 0, 0 6129 }, 6130 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6131 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6132 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6133 }, 6134 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6135 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6136 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6137 }, 6138 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6139 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6140 0, 0 6141 }, 6142 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6143 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6144 0, 0 6145 }, 6146 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6147 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6148 0, 0 6149 }, 6150 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6151 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6152 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6153 }, 6154 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6155 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6156 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6157 }, 6158 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6159 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6160 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6161 }, 6162 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6163 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6164 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6165 }, 6166 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6167 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6168 0, 0 6169 }, 6170 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6171 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6172 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6173 }, 6174 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6175 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6176 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6177 }, 6178 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6179 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6180 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6181 }, 6182 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6183 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6184 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6185 }, 6186 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6187 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6188 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6189 }, 6190 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6191 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6192 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6193 }, 6194 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6195 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6196 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6197 }, 6198 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6199 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6200 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6201 }, 6202 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6203 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6204 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6205 }, 6206 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6207 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6208 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6209 }, 6210 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6211 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6212 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6213 }, 6214 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6215 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6216 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6217 }, 6218 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6219 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6220 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6221 }, 6222 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6223 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6224 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6225 }, 6226 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6227 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6228 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6229 }, 6230 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6231 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6232 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6233 }, 6234 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6235 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6236 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6237 }, 6238 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6239 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6240 0, 0 6241 }, 6242 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6243 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6244 0, 0 6245 }, 6246 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6247 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6248 0, 0 6249 }, 6250 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6251 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6252 0, 0 6253 }, 6254 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6255 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6256 0, 0 6257 }, 6258 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6259 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6260 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6261 }, 6262 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6263 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6264 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6265 }, 6266 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6267 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6268 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6269 }, 6270 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6271 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6272 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6273 }, 6274 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6275 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6276 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6277 }, 6278 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6279 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6280 0, 0 6281 }, 6282 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6283 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6284 0, 0 6285 }, 6286 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6287 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6288 0, 0 6289 }, 6290 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6291 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6292 0, 0 6293 }, 6294 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6295 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6296 0, 0 6297 }, 6298 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6299 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6300 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6301 }, 6302 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6303 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6304 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6305 }, 6306 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6307 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6308 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6309 }, 6310 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6311 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6312 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6313 }, 6314 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6315 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6316 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6317 }, 6318 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6319 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6320 0, 0 6321 }, 6322 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6323 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6324 0, 0 6325 }, 6326 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6327 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6328 0, 0 6329 }, 6330 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6331 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6332 0, 0 6333 }, 6334 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6335 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6336 0, 0 6337 }, 6338 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6339 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6340 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6341 }, 6342 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6343 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6344 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6345 }, 6346 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6347 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6348 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6349 }, 6350 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6351 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6352 0, 0 6353 }, 6354 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6355 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6356 0, 0 6357 }, 6358 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6359 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6360 0, 0 6361 }, 6362 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6363 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6364 0, 0 6365 }, 6366 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6367 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6368 0, 0 6369 }, 6370 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6371 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6372 0, 0 6373 } 6374 }; 6375 6376 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6377 void *inject_if) 6378 { 6379 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6380 int ret; 6381 struct ta_ras_trigger_error_input block_info = { 0 }; 6382 6383 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6384 return -EINVAL; 6385 6386 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6387 return -EINVAL; 6388 6389 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6390 return -EPERM; 6391 6392 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6393 info->head.type)) { 6394 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6395 ras_gfx_subblocks[info->head.sub_block_index].name, 6396 info->head.type); 6397 return -EPERM; 6398 } 6399 6400 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6401 info->head.type)) { 6402 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6403 ras_gfx_subblocks[info->head.sub_block_index].name, 6404 info->head.type); 6405 return -EPERM; 6406 } 6407 6408 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6409 block_info.sub_block_index = 6410 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6411 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6412 block_info.address = info->address; 6413 block_info.value = info->value; 6414 6415 mutex_lock(&adev->grbm_idx_mutex); 6416 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6417 mutex_unlock(&adev->grbm_idx_mutex); 6418 6419 return ret; 6420 } 6421 6422 static const char *vml2_mems[] = { 6423 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6424 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6425 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6426 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6427 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6428 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6429 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6430 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6431 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6432 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6433 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6434 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6435 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6436 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6437 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6438 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6439 }; 6440 6441 static const char *vml2_walker_mems[] = { 6442 "UTC_VML2_CACHE_PDE0_MEM0", 6443 "UTC_VML2_CACHE_PDE0_MEM1", 6444 "UTC_VML2_CACHE_PDE1_MEM0", 6445 "UTC_VML2_CACHE_PDE1_MEM1", 6446 "UTC_VML2_CACHE_PDE2_MEM0", 6447 "UTC_VML2_CACHE_PDE2_MEM1", 6448 "UTC_VML2_RDIF_LOG_FIFO", 6449 }; 6450 6451 static const char *atc_l2_cache_2m_mems[] = { 6452 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6453 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6454 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6455 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6456 }; 6457 6458 static const char *atc_l2_cache_4k_mems[] = { 6459 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6460 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6461 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6462 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6463 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6464 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6465 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6466 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6467 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6468 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6469 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6470 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6471 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6472 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6473 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6474 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6475 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6476 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6477 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6478 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6479 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6480 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6481 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6482 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6483 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6484 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6485 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6486 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6487 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6488 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6489 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6490 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6491 }; 6492 6493 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6494 struct ras_err_data *err_data) 6495 { 6496 uint32_t i, data; 6497 uint32_t sec_count, ded_count; 6498 6499 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6500 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6501 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6502 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6503 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6504 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6505 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6506 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6507 6508 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6509 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6510 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6511 6512 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6513 if (sec_count) { 6514 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6515 "SEC %d\n", i, vml2_mems[i], sec_count); 6516 err_data->ce_count += sec_count; 6517 } 6518 6519 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6520 if (ded_count) { 6521 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6522 "DED %d\n", i, vml2_mems[i], ded_count); 6523 err_data->ue_count += ded_count; 6524 } 6525 } 6526 6527 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6528 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6529 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6530 6531 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6532 SEC_COUNT); 6533 if (sec_count) { 6534 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6535 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6536 err_data->ce_count += sec_count; 6537 } 6538 6539 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6540 DED_COUNT); 6541 if (ded_count) { 6542 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6543 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6544 err_data->ue_count += ded_count; 6545 } 6546 } 6547 6548 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6549 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6550 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6551 6552 sec_count = (data & 0x00006000L) >> 0xd; 6553 if (sec_count) { 6554 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6555 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6556 sec_count); 6557 err_data->ce_count += sec_count; 6558 } 6559 } 6560 6561 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6562 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6563 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6564 6565 sec_count = (data & 0x00006000L) >> 0xd; 6566 if (sec_count) { 6567 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6568 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6569 sec_count); 6570 err_data->ce_count += sec_count; 6571 } 6572 6573 ded_count = (data & 0x00018000L) >> 0xf; 6574 if (ded_count) { 6575 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6576 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6577 ded_count); 6578 err_data->ue_count += ded_count; 6579 } 6580 } 6581 6582 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6583 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6584 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6585 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6586 6587 return 0; 6588 } 6589 6590 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6591 const struct soc15_reg_entry *reg, 6592 uint32_t se_id, uint32_t inst_id, uint32_t value, 6593 uint32_t *sec_count, uint32_t *ded_count) 6594 { 6595 uint32_t i; 6596 uint32_t sec_cnt, ded_cnt; 6597 6598 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6599 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6600 gfx_v9_0_ras_fields[i].seg != reg->seg || 6601 gfx_v9_0_ras_fields[i].inst != reg->inst) 6602 continue; 6603 6604 sec_cnt = (value & 6605 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6606 gfx_v9_0_ras_fields[i].sec_count_shift; 6607 if (sec_cnt) { 6608 dev_info(adev->dev, "GFX SubBlock %s, " 6609 "Instance[%d][%d], SEC %d\n", 6610 gfx_v9_0_ras_fields[i].name, 6611 se_id, inst_id, 6612 sec_cnt); 6613 *sec_count += sec_cnt; 6614 } 6615 6616 ded_cnt = (value & 6617 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6618 gfx_v9_0_ras_fields[i].ded_count_shift; 6619 if (ded_cnt) { 6620 dev_info(adev->dev, "GFX SubBlock %s, " 6621 "Instance[%d][%d], DED %d\n", 6622 gfx_v9_0_ras_fields[i].name, 6623 se_id, inst_id, 6624 ded_cnt); 6625 *ded_count += ded_cnt; 6626 } 6627 } 6628 6629 return 0; 6630 } 6631 6632 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6633 { 6634 int i, j, k; 6635 6636 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6637 return; 6638 6639 /* read back registers to clear the counters */ 6640 mutex_lock(&adev->grbm_idx_mutex); 6641 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6642 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6643 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6644 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 6645 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6646 } 6647 } 6648 } 6649 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6650 mutex_unlock(&adev->grbm_idx_mutex); 6651 6652 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6653 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6654 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6655 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6656 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6657 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6658 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6659 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6660 6661 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6662 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6663 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6664 } 6665 6666 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6667 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6668 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6669 } 6670 6671 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6672 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6673 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6674 } 6675 6676 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6677 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6678 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6679 } 6680 6681 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6682 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6683 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6684 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6685 } 6686 6687 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6688 void *ras_error_status) 6689 { 6690 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6691 uint32_t sec_count = 0, ded_count = 0; 6692 uint32_t i, j, k; 6693 uint32_t reg_value; 6694 6695 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6696 return -EINVAL; 6697 6698 err_data->ue_count = 0; 6699 err_data->ce_count = 0; 6700 6701 mutex_lock(&adev->grbm_idx_mutex); 6702 6703 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6704 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6705 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6706 gfx_v9_0_select_se_sh(adev, j, 0, k); 6707 reg_value = 6708 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6709 if (reg_value) 6710 gfx_v9_0_ras_error_count(adev, 6711 &gfx_v9_0_edc_counter_regs[i], 6712 j, k, reg_value, 6713 &sec_count, &ded_count); 6714 } 6715 } 6716 } 6717 6718 err_data->ce_count += sec_count; 6719 err_data->ue_count += ded_count; 6720 6721 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6722 mutex_unlock(&adev->grbm_idx_mutex); 6723 6724 gfx_v9_0_query_utc_edc_status(adev, err_data); 6725 6726 return 0; 6727 } 6728 6729 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6730 { 6731 const unsigned int cp_coher_cntl = 6732 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6733 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6734 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6735 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6736 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6737 6738 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6739 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6740 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6741 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6742 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6743 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6744 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6745 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6746 } 6747 6748 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6749 uint32_t pipe, bool enable) 6750 { 6751 struct amdgpu_device *adev = ring->adev; 6752 uint32_t val; 6753 uint32_t wcl_cs_reg; 6754 6755 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6756 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6757 6758 switch (pipe) { 6759 case 0: 6760 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6761 break; 6762 case 1: 6763 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6764 break; 6765 case 2: 6766 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6767 break; 6768 case 3: 6769 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6770 break; 6771 default: 6772 DRM_DEBUG("invalid pipe %d\n", pipe); 6773 return; 6774 } 6775 6776 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6777 6778 } 6779 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6780 { 6781 struct amdgpu_device *adev = ring->adev; 6782 uint32_t val; 6783 int i; 6784 6785 6786 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6787 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6788 * around 25% of gpu resources. 6789 */ 6790 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6791 amdgpu_ring_emit_wreg(ring, 6792 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6793 val); 6794 6795 /* Restrict waves for normal/low priority compute queues as well 6796 * to get best QoS for high priority compute jobs. 6797 * 6798 * amdgpu controls only 1st ME(0-3 CS pipes). 6799 */ 6800 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6801 if (i != ring->pipe) 6802 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 6803 6804 } 6805 } 6806 6807 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6808 .name = "gfx_v9_0", 6809 .early_init = gfx_v9_0_early_init, 6810 .late_init = gfx_v9_0_late_init, 6811 .sw_init = gfx_v9_0_sw_init, 6812 .sw_fini = gfx_v9_0_sw_fini, 6813 .hw_init = gfx_v9_0_hw_init, 6814 .hw_fini = gfx_v9_0_hw_fini, 6815 .suspend = gfx_v9_0_suspend, 6816 .resume = gfx_v9_0_resume, 6817 .is_idle = gfx_v9_0_is_idle, 6818 .wait_for_idle = gfx_v9_0_wait_for_idle, 6819 .soft_reset = gfx_v9_0_soft_reset, 6820 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6821 .set_powergating_state = gfx_v9_0_set_powergating_state, 6822 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6823 }; 6824 6825 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6826 .type = AMDGPU_RING_TYPE_GFX, 6827 .align_mask = 0xff, 6828 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6829 .support_64bit_ptrs = true, 6830 .vmhub = AMDGPU_GFXHUB_0, 6831 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6832 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6833 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6834 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6835 5 + /* COND_EXEC */ 6836 7 + /* PIPELINE_SYNC */ 6837 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6838 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6839 2 + /* VM_FLUSH */ 6840 8 + /* FENCE for VM_FLUSH */ 6841 20 + /* GDS switch */ 6842 4 + /* double SWITCH_BUFFER, 6843 the first COND_EXEC jump to the place just 6844 prior to this double SWITCH_BUFFER */ 6845 5 + /* COND_EXEC */ 6846 7 + /* HDP_flush */ 6847 4 + /* VGT_flush */ 6848 14 + /* CE_META */ 6849 31 + /* DE_META */ 6850 3 + /* CNTX_CTRL */ 6851 5 + /* HDP_INVL */ 6852 8 + 8 + /* FENCE x2 */ 6853 2 + /* SWITCH_BUFFER */ 6854 7, /* gfx_v9_0_emit_mem_sync */ 6855 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6856 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6857 .emit_fence = gfx_v9_0_ring_emit_fence, 6858 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6859 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6860 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6861 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6862 .test_ring = gfx_v9_0_ring_test_ring, 6863 .test_ib = gfx_v9_0_ring_test_ib, 6864 .insert_nop = amdgpu_ring_insert_nop, 6865 .pad_ib = amdgpu_ring_generic_pad_ib, 6866 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6867 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6868 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6869 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6870 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6871 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6872 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6873 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6874 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6875 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6876 }; 6877 6878 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6879 .type = AMDGPU_RING_TYPE_COMPUTE, 6880 .align_mask = 0xff, 6881 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6882 .support_64bit_ptrs = true, 6883 .vmhub = AMDGPU_GFXHUB_0, 6884 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6885 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6886 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6887 .emit_frame_size = 6888 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6889 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6890 5 + /* hdp invalidate */ 6891 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6892 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6893 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6894 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6895 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6896 7 + /* gfx_v9_0_emit_mem_sync */ 6897 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 6898 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 6899 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6900 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6901 .emit_fence = gfx_v9_0_ring_emit_fence, 6902 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6903 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6904 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6905 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6906 .test_ring = gfx_v9_0_ring_test_ring, 6907 .test_ib = gfx_v9_0_ring_test_ib, 6908 .insert_nop = amdgpu_ring_insert_nop, 6909 .pad_ib = amdgpu_ring_generic_pad_ib, 6910 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6911 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6912 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6913 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6914 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 6915 }; 6916 6917 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6918 .type = AMDGPU_RING_TYPE_KIQ, 6919 .align_mask = 0xff, 6920 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6921 .support_64bit_ptrs = true, 6922 .vmhub = AMDGPU_GFXHUB_0, 6923 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6924 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6925 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6926 .emit_frame_size = 6927 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6928 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6929 5 + /* hdp invalidate */ 6930 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6931 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6932 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6933 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6934 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6935 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6936 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6937 .test_ring = gfx_v9_0_ring_test_ring, 6938 .insert_nop = amdgpu_ring_insert_nop, 6939 .pad_ib = amdgpu_ring_generic_pad_ib, 6940 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6941 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6942 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6943 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6944 }; 6945 6946 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6947 { 6948 int i; 6949 6950 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6951 6952 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6953 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6954 6955 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6956 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6957 } 6958 6959 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6960 .set = gfx_v9_0_set_eop_interrupt_state, 6961 .process = gfx_v9_0_eop_irq, 6962 }; 6963 6964 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6965 .set = gfx_v9_0_set_priv_reg_fault_state, 6966 .process = gfx_v9_0_priv_reg_irq, 6967 }; 6968 6969 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6970 .set = gfx_v9_0_set_priv_inst_fault_state, 6971 .process = gfx_v9_0_priv_inst_irq, 6972 }; 6973 6974 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6975 .set = gfx_v9_0_set_cp_ecc_error_state, 6976 .process = amdgpu_gfx_cp_ecc_error_irq, 6977 }; 6978 6979 6980 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6981 { 6982 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6983 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6984 6985 adev->gfx.priv_reg_irq.num_types = 1; 6986 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6987 6988 adev->gfx.priv_inst_irq.num_types = 1; 6989 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6990 6991 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6992 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6993 } 6994 6995 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6996 { 6997 switch (adev->asic_type) { 6998 case CHIP_VEGA10: 6999 case CHIP_VEGA12: 7000 case CHIP_VEGA20: 7001 case CHIP_RAVEN: 7002 case CHIP_ARCTURUS: 7003 case CHIP_RENOIR: 7004 case CHIP_ALDEBARAN: 7005 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7006 break; 7007 default: 7008 break; 7009 } 7010 } 7011 7012 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7013 { 7014 /* init asci gds info */ 7015 switch (adev->asic_type) { 7016 case CHIP_VEGA10: 7017 case CHIP_VEGA12: 7018 case CHIP_VEGA20: 7019 adev->gds.gds_size = 0x10000; 7020 break; 7021 case CHIP_RAVEN: 7022 case CHIP_ARCTURUS: 7023 adev->gds.gds_size = 0x1000; 7024 break; 7025 case CHIP_ALDEBARAN: 7026 /* aldebaran removed all the GDS internal memory, 7027 * only support GWS opcode in kernel, like barrier 7028 * semaphore.etc */ 7029 adev->gds.gds_size = 0; 7030 break; 7031 default: 7032 adev->gds.gds_size = 0x10000; 7033 break; 7034 } 7035 7036 switch (adev->asic_type) { 7037 case CHIP_VEGA10: 7038 case CHIP_VEGA20: 7039 adev->gds.gds_compute_max_wave_id = 0x7ff; 7040 break; 7041 case CHIP_VEGA12: 7042 adev->gds.gds_compute_max_wave_id = 0x27f; 7043 break; 7044 case CHIP_RAVEN: 7045 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7046 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7047 else 7048 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7049 break; 7050 case CHIP_ARCTURUS: 7051 adev->gds.gds_compute_max_wave_id = 0xfff; 7052 break; 7053 case CHIP_ALDEBARAN: 7054 /* deprecated for Aldebaran, no usage at all */ 7055 adev->gds.gds_compute_max_wave_id = 0; 7056 break; 7057 default: 7058 /* this really depends on the chip */ 7059 adev->gds.gds_compute_max_wave_id = 0x7ff; 7060 break; 7061 } 7062 7063 adev->gds.gws_size = 64; 7064 adev->gds.oa_size = 16; 7065 } 7066 7067 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7068 u32 bitmap) 7069 { 7070 u32 data; 7071 7072 if (!bitmap) 7073 return; 7074 7075 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7076 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7077 7078 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7079 } 7080 7081 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7082 { 7083 u32 data, mask; 7084 7085 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7086 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7087 7088 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7089 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7090 7091 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7092 7093 return (~data) & mask; 7094 } 7095 7096 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7097 struct amdgpu_cu_info *cu_info) 7098 { 7099 int i, j, k, counter, active_cu_number = 0; 7100 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7101 unsigned disable_masks[4 * 4]; 7102 7103 if (!adev || !cu_info) 7104 return -EINVAL; 7105 7106 /* 7107 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7108 */ 7109 if (adev->gfx.config.max_shader_engines * 7110 adev->gfx.config.max_sh_per_se > 16) 7111 return -EINVAL; 7112 7113 amdgpu_gfx_parse_disable_cu(disable_masks, 7114 adev->gfx.config.max_shader_engines, 7115 adev->gfx.config.max_sh_per_se); 7116 7117 mutex_lock(&adev->grbm_idx_mutex); 7118 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7119 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7120 mask = 1; 7121 ao_bitmap = 0; 7122 counter = 0; 7123 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 7124 gfx_v9_0_set_user_cu_inactive_bitmap( 7125 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7126 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7127 7128 /* 7129 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7130 * 4x4 size array, and it's usually suitable for Vega 7131 * ASICs which has 4*2 SE/SH layout. 7132 * But for Arcturus, SE/SH layout is changed to 8*1. 7133 * To mostly reduce the impact, we make it compatible 7134 * with current bitmap array as below: 7135 * SE4,SH0 --> bitmap[0][1] 7136 * SE5,SH0 --> bitmap[1][1] 7137 * SE6,SH0 --> bitmap[2][1] 7138 * SE7,SH0 --> bitmap[3][1] 7139 */ 7140 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 7141 7142 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7143 if (bitmap & mask) { 7144 if (counter < adev->gfx.config.max_cu_per_sh) 7145 ao_bitmap |= mask; 7146 counter ++; 7147 } 7148 mask <<= 1; 7149 } 7150 active_cu_number += counter; 7151 if (i < 2 && j < 2) 7152 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7153 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7154 } 7155 } 7156 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7157 mutex_unlock(&adev->grbm_idx_mutex); 7158 7159 cu_info->number = active_cu_number; 7160 cu_info->ao_cu_mask = ao_cu_mask; 7161 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7162 7163 return 0; 7164 } 7165 7166 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7167 { 7168 .type = AMD_IP_BLOCK_TYPE_GFX, 7169 .major = 9, 7170 .minor = 0, 7171 .rev = 0, 7172 .funcs = &gfx_v9_0_ip_funcs, 7173 }; 7174