1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "gfx_v9_4.h" 51 #include "gfx_v9_0.h" 52 #include "gfx_v9_4_2.h" 53 54 #include "asic_reg/pwr/pwr_10_0_offset.h" 55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 56 #include "asic_reg/gc/gc_9_0_default.h" 57 58 #define GFX9_NUM_GFX_RINGS 1 59 #define GFX9_MEC_HPD_SIZE 4096 60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 #define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28) 67 #define GFX9_RLCG_GC_WRITE (0x0 << 28) 68 #define GFX9_RLCG_GC_READ (0x1 << 28) 69 #define GFX9_RLCG_VFGATE_DISABLED 0x4000000 70 #define GFX9_RLCG_WRONG_OPERATION_TYPE 0x2000000 71 #define GFX9_RLCG_NOT_IN_RANGE 0x1000000 72 73 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 90 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 97 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 100 101 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 104 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 106 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 107 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 108 109 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 110 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 111 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 112 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 113 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 114 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 115 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 119 120 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 121 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 122 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 123 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 124 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 125 126 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 127 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 128 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 129 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 130 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 131 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 132 133 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 134 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 135 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 136 137 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 138 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 139 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 140 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 141 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 142 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 143 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 144 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 145 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 146 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 147 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 148 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 149 150 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 151 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 152 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 153 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 154 155 enum ta_ras_gfx_subblock { 156 /*CPC*/ 157 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 158 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 159 TA_RAS_BLOCK__GFX_CPC_UCODE, 160 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 161 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 162 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 163 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 164 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 165 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 166 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 167 /* CPF*/ 168 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 169 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 170 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 171 TA_RAS_BLOCK__GFX_CPF_TAG, 172 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 173 /* CPG*/ 174 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 175 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 176 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 177 TA_RAS_BLOCK__GFX_CPG_TAG, 178 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 179 /* GDS*/ 180 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 181 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 182 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 183 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 184 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 185 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 186 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 187 /* SPI*/ 188 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 189 /* SQ*/ 190 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 191 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 192 TA_RAS_BLOCK__GFX_SQ_LDS_D, 193 TA_RAS_BLOCK__GFX_SQ_LDS_I, 194 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 195 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 196 /* SQC (3 ranges)*/ 197 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 198 /* SQC range 0*/ 199 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 200 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 201 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 202 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 203 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 204 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 205 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 206 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 207 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 208 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 209 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 210 /* SQC range 1*/ 211 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 212 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 213 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 214 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 215 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 216 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 218 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 219 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 221 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 222 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 224 /* SQC range 2*/ 225 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 226 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 227 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 228 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 229 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 230 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 231 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 232 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 233 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 234 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 235 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 236 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 237 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 238 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 239 /* TA*/ 240 TA_RAS_BLOCK__GFX_TA_INDEX_START, 241 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 242 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 243 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 244 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 245 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 246 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 247 /* TCA*/ 248 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 249 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 250 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 251 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 252 /* TCC (5 sub-ranges)*/ 253 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 254 /* TCC range 0*/ 255 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 256 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 257 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 258 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 259 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 260 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 261 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 262 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 263 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 264 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 265 /* TCC range 1*/ 266 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 267 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 268 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 269 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 270 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 271 /* TCC range 2*/ 272 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 273 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 274 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 275 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 276 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 277 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 278 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 279 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 280 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 281 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 282 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 283 /* TCC range 3*/ 284 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 285 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 286 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 287 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 288 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 289 /* TCC range 4*/ 290 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 291 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 292 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 293 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 294 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 295 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 296 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 297 /* TCI*/ 298 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 299 /* TCP*/ 300 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 301 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 302 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 303 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 304 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 305 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 306 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 307 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 308 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 309 /* TD*/ 310 TA_RAS_BLOCK__GFX_TD_INDEX_START, 311 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 312 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 313 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 314 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 315 /* EA (3 sub-ranges)*/ 316 TA_RAS_BLOCK__GFX_EA_INDEX_START, 317 /* EA range 0*/ 318 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 319 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 320 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 321 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 322 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 323 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 324 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 325 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 326 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 327 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 328 /* EA range 1*/ 329 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 330 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 331 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 332 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 333 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 334 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 335 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 336 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 337 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 338 /* EA range 2*/ 339 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 340 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 341 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 342 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 343 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 344 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 345 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 346 /* UTC VM L2 bank*/ 347 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 348 /* UTC VM walker*/ 349 TA_RAS_BLOCK__UTC_VML2_WALKER, 350 /* UTC ATC L2 2MB cache*/ 351 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 352 /* UTC ATC L2 4KB cache*/ 353 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 354 TA_RAS_BLOCK__GFX_MAX 355 }; 356 357 struct ras_gfx_subblock { 358 unsigned char *name; 359 int ta_subblock; 360 int hw_supported_error_type; 361 int sw_supported_error_type; 362 }; 363 364 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 365 [AMDGPU_RAS_BLOCK__##subblock] = { \ 366 #subblock, \ 367 TA_RAS_BLOCK__##subblock, \ 368 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 369 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 370 } 371 372 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 373 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 374 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 375 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 378 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 380 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 382 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 384 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 386 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 387 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 388 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 390 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 392 0), 393 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 394 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 396 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 398 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 400 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 401 0, 0), 402 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 403 0), 404 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 405 0, 0), 406 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 407 0), 408 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 409 0, 0), 410 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 411 0), 412 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 413 1), 414 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 415 0, 0, 0), 416 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 417 0), 418 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 419 0), 420 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 421 0), 422 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 423 0), 424 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 425 0), 426 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 427 0, 0), 428 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 429 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 431 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 433 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 435 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 437 0), 438 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 439 0), 440 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 441 0), 442 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 443 0), 444 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 445 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 447 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 457 1), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 459 1), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 461 1), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 463 0), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 465 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 476 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 478 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 481 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 483 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 485 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 488 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 495 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 509 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 510 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 511 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 512 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 513 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 514 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 515 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 516 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 517 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 518 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 519 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 520 }; 521 522 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 523 { 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 544 }; 545 546 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 547 { 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 566 }; 567 568 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 569 { 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 581 }; 582 583 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 584 { 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 609 }; 610 611 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 612 { 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 620 }; 621 622 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 623 { 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 643 }; 644 645 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 646 { 647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 659 }; 660 661 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 662 { 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 666 }; 667 668 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 669 { 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 686 }; 687 688 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 689 { 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 703 }; 704 705 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 706 { 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 718 }; 719 720 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 721 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 722 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 723 }; 724 725 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 726 { 727 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 728 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 729 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 730 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 731 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 732 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 733 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 734 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 735 }; 736 737 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 738 { 739 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 740 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 741 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 742 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 743 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 744 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 745 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 746 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 747 }; 748 749 static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag) 750 { 751 static void *scratch_reg0; 752 static void *scratch_reg1; 753 static void *scratch_reg2; 754 static void *scratch_reg3; 755 static void *spare_int; 756 static uint32_t grbm_cntl; 757 static uint32_t grbm_idx; 758 uint32_t i = 0; 759 uint32_t retries = 50000; 760 u32 ret = 0; 761 u32 tmp; 762 763 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; 764 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; 765 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4; 766 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4; 767 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; 768 769 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; 770 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; 771 772 if (offset == grbm_cntl || offset == grbm_idx) { 773 if (offset == grbm_cntl) 774 writel(v, scratch_reg2); 775 else if (offset == grbm_idx) 776 writel(v, scratch_reg3); 777 778 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); 779 } else { 780 /* 781 * SCRATCH_REG0 = read/write value 782 * SCRATCH_REG1[30:28] = command 783 * SCRATCH_REG1[19:0] = address in dword 784 * SCRATCH_REG1[26:24] = Error reporting 785 */ 786 writel(v, scratch_reg0); 787 writel(offset | flag, scratch_reg1); 788 writel(1, spare_int); 789 790 for (i = 0; i < retries; i++) { 791 tmp = readl(scratch_reg1); 792 if (!(tmp & flag)) 793 break; 794 795 udelay(10); 796 } 797 798 if (i >= retries) { 799 if (amdgpu_sriov_reg_indirect_gc(adev)) { 800 if (tmp & GFX9_RLCG_VFGATE_DISABLED) 801 pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset); 802 else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE) 803 pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); 804 else if (tmp & GFX9_RLCG_NOT_IN_RANGE) 805 pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); 806 else 807 pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); 808 } else 809 pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); 810 } 811 } 812 813 ret = readl(scratch_reg0); 814 815 return ret; 816 } 817 818 static bool gfx_v9_0_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, 819 int write, u32 *rlcg_flag) 820 { 821 822 switch (hwip) { 823 case GC_HWIP: 824 if (amdgpu_sriov_reg_indirect_gc(adev)) { 825 *rlcg_flag = write ? GFX9_RLCG_GC_WRITE : GFX9_RLCG_GC_READ; 826 827 return true; 828 /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */ 829 } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) { 830 *rlcg_flag = GFX9_RLCG_GC_WRITE_OLD; 831 return true; 832 } 833 834 break; 835 default: 836 return false; 837 } 838 839 return false; 840 } 841 842 static u32 gfx_v9_0_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) 843 { 844 u32 rlcg_flag; 845 846 if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag)) 847 return gfx_v9_0_rlcg_rw(adev, offset, 0, rlcg_flag); 848 849 if (acc_flags & AMDGPU_REGS_NO_KIQ) 850 return RREG32_NO_KIQ(offset); 851 else 852 return RREG32(offset); 853 } 854 855 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, 856 u32 value, u32 acc_flags, u32 hwip) 857 { 858 u32 rlcg_flag; 859 860 if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) { 861 gfx_v9_0_rlcg_rw(adev, offset, value, rlcg_flag); 862 return; 863 } 864 865 if (acc_flags & AMDGPU_REGS_NO_KIQ) 866 WREG32_NO_KIQ(offset, value); 867 else 868 WREG32(offset, value); 869 } 870 871 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 872 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 873 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 874 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 875 876 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 877 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 878 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 879 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 880 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 881 struct amdgpu_cu_info *cu_info); 882 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 883 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 884 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 885 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 886 void *ras_error_status); 887 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 888 void *inject_if); 889 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 890 891 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 892 uint64_t queue_mask) 893 { 894 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 895 amdgpu_ring_write(kiq_ring, 896 PACKET3_SET_RESOURCES_VMID_MASK(0) | 897 /* vmid_mask:0* queue_type:0 (KIQ) */ 898 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 899 amdgpu_ring_write(kiq_ring, 900 lower_32_bits(queue_mask)); /* queue mask lo */ 901 amdgpu_ring_write(kiq_ring, 902 upper_32_bits(queue_mask)); /* queue mask hi */ 903 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 904 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 905 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 906 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 907 } 908 909 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 910 struct amdgpu_ring *ring) 911 { 912 struct amdgpu_device *adev = kiq_ring->adev; 913 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 914 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 915 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 916 917 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 918 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 919 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 920 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 921 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 922 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 923 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 924 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 925 /*queue_type: normal compute queue */ 926 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 927 /* alloc format: all_on_one_pipe */ 928 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 929 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 930 /* num_queues: must be 1 */ 931 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 932 amdgpu_ring_write(kiq_ring, 933 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 934 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 935 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 936 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 937 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 938 } 939 940 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 941 struct amdgpu_ring *ring, 942 enum amdgpu_unmap_queues_action action, 943 u64 gpu_addr, u64 seq) 944 { 945 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 946 947 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 948 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 949 PACKET3_UNMAP_QUEUES_ACTION(action) | 950 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 951 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 952 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 953 amdgpu_ring_write(kiq_ring, 954 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 955 956 if (action == PREEMPT_QUEUES_NO_UNMAP) { 957 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 958 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 959 amdgpu_ring_write(kiq_ring, seq); 960 } else { 961 amdgpu_ring_write(kiq_ring, 0); 962 amdgpu_ring_write(kiq_ring, 0); 963 amdgpu_ring_write(kiq_ring, 0); 964 } 965 } 966 967 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 968 struct amdgpu_ring *ring, 969 u64 addr, 970 u64 seq) 971 { 972 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 973 974 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 975 amdgpu_ring_write(kiq_ring, 976 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 977 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 978 PACKET3_QUERY_STATUS_COMMAND(2)); 979 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 980 amdgpu_ring_write(kiq_ring, 981 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 982 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 983 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 984 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 985 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 986 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 987 } 988 989 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 990 uint16_t pasid, uint32_t flush_type, 991 bool all_hub) 992 { 993 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 994 amdgpu_ring_write(kiq_ring, 995 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 996 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 997 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 998 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 999 } 1000 1001 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1002 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1003 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1004 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1005 .kiq_query_status = gfx_v9_0_kiq_query_status, 1006 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1007 .set_resources_size = 8, 1008 .map_queues_size = 7, 1009 .unmap_queues_size = 6, 1010 .query_status_size = 7, 1011 .invalidate_tlbs_size = 2, 1012 }; 1013 1014 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1015 { 1016 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 1017 } 1018 1019 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1020 { 1021 switch (adev->ip_versions[GC_HWIP][0]) { 1022 case IP_VERSION(9, 0, 1): 1023 soc15_program_register_sequence(adev, 1024 golden_settings_gc_9_0, 1025 ARRAY_SIZE(golden_settings_gc_9_0)); 1026 soc15_program_register_sequence(adev, 1027 golden_settings_gc_9_0_vg10, 1028 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1029 break; 1030 case IP_VERSION(9, 2, 1): 1031 soc15_program_register_sequence(adev, 1032 golden_settings_gc_9_2_1, 1033 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1034 soc15_program_register_sequence(adev, 1035 golden_settings_gc_9_2_1_vg12, 1036 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1037 break; 1038 case IP_VERSION(9, 4, 0): 1039 soc15_program_register_sequence(adev, 1040 golden_settings_gc_9_0, 1041 ARRAY_SIZE(golden_settings_gc_9_0)); 1042 soc15_program_register_sequence(adev, 1043 golden_settings_gc_9_0_vg20, 1044 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1045 break; 1046 case IP_VERSION(9, 4, 1): 1047 soc15_program_register_sequence(adev, 1048 golden_settings_gc_9_4_1_arct, 1049 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1050 break; 1051 case IP_VERSION(9, 2, 2): 1052 case IP_VERSION(9, 1, 0): 1053 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1054 ARRAY_SIZE(golden_settings_gc_9_1)); 1055 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1056 soc15_program_register_sequence(adev, 1057 golden_settings_gc_9_1_rv2, 1058 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1059 else 1060 soc15_program_register_sequence(adev, 1061 golden_settings_gc_9_1_rv1, 1062 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1063 break; 1064 case IP_VERSION(9, 3, 0): 1065 soc15_program_register_sequence(adev, 1066 golden_settings_gc_9_1_rn, 1067 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1068 return; /* for renoir, don't need common goldensetting */ 1069 case IP_VERSION(9, 4, 2): 1070 gfx_v9_4_2_init_golden_registers(adev, 1071 adev->smuio.funcs->get_die_id(adev)); 1072 break; 1073 default: 1074 break; 1075 } 1076 1077 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) && 1078 (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))) 1079 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1080 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1081 } 1082 1083 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 1084 { 1085 adev->gfx.scratch.num_reg = 8; 1086 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1087 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 1088 } 1089 1090 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1091 bool wc, uint32_t reg, uint32_t val) 1092 { 1093 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1094 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1095 WRITE_DATA_DST_SEL(0) | 1096 (wc ? WR_CONFIRM : 0)); 1097 amdgpu_ring_write(ring, reg); 1098 amdgpu_ring_write(ring, 0); 1099 amdgpu_ring_write(ring, val); 1100 } 1101 1102 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1103 int mem_space, int opt, uint32_t addr0, 1104 uint32_t addr1, uint32_t ref, uint32_t mask, 1105 uint32_t inv) 1106 { 1107 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1108 amdgpu_ring_write(ring, 1109 /* memory (1) or register (0) */ 1110 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1111 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1112 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1113 WAIT_REG_MEM_ENGINE(eng_sel))); 1114 1115 if (mem_space) 1116 BUG_ON(addr0 & 0x3); /* Dword align */ 1117 amdgpu_ring_write(ring, addr0); 1118 amdgpu_ring_write(ring, addr1); 1119 amdgpu_ring_write(ring, ref); 1120 amdgpu_ring_write(ring, mask); 1121 amdgpu_ring_write(ring, inv); /* poll interval */ 1122 } 1123 1124 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1125 { 1126 struct amdgpu_device *adev = ring->adev; 1127 uint32_t scratch; 1128 uint32_t tmp = 0; 1129 unsigned i; 1130 int r; 1131 1132 r = amdgpu_gfx_scratch_get(adev, &scratch); 1133 if (r) 1134 return r; 1135 1136 WREG32(scratch, 0xCAFEDEAD); 1137 r = amdgpu_ring_alloc(ring, 3); 1138 if (r) 1139 goto error_free_scratch; 1140 1141 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1142 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 1143 amdgpu_ring_write(ring, 0xDEADBEEF); 1144 amdgpu_ring_commit(ring); 1145 1146 for (i = 0; i < adev->usec_timeout; i++) { 1147 tmp = RREG32(scratch); 1148 if (tmp == 0xDEADBEEF) 1149 break; 1150 udelay(1); 1151 } 1152 1153 if (i >= adev->usec_timeout) 1154 r = -ETIMEDOUT; 1155 1156 error_free_scratch: 1157 amdgpu_gfx_scratch_free(adev, scratch); 1158 return r; 1159 } 1160 1161 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1162 { 1163 struct amdgpu_device *adev = ring->adev; 1164 struct amdgpu_ib ib; 1165 struct dma_fence *f = NULL; 1166 1167 unsigned index; 1168 uint64_t gpu_addr; 1169 uint32_t tmp; 1170 long r; 1171 1172 r = amdgpu_device_wb_get(adev, &index); 1173 if (r) 1174 return r; 1175 1176 gpu_addr = adev->wb.gpu_addr + (index * 4); 1177 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1178 memset(&ib, 0, sizeof(ib)); 1179 r = amdgpu_ib_get(adev, NULL, 16, 1180 AMDGPU_IB_POOL_DIRECT, &ib); 1181 if (r) 1182 goto err1; 1183 1184 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1185 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1186 ib.ptr[2] = lower_32_bits(gpu_addr); 1187 ib.ptr[3] = upper_32_bits(gpu_addr); 1188 ib.ptr[4] = 0xDEADBEEF; 1189 ib.length_dw = 5; 1190 1191 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1192 if (r) 1193 goto err2; 1194 1195 r = dma_fence_wait_timeout(f, false, timeout); 1196 if (r == 0) { 1197 r = -ETIMEDOUT; 1198 goto err2; 1199 } else if (r < 0) { 1200 goto err2; 1201 } 1202 1203 tmp = adev->wb.wb[index]; 1204 if (tmp == 0xDEADBEEF) 1205 r = 0; 1206 else 1207 r = -EINVAL; 1208 1209 err2: 1210 amdgpu_ib_free(adev, &ib, NULL); 1211 dma_fence_put(f); 1212 err1: 1213 amdgpu_device_wb_free(adev, index); 1214 return r; 1215 } 1216 1217 1218 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1219 { 1220 release_firmware(adev->gfx.pfp_fw); 1221 adev->gfx.pfp_fw = NULL; 1222 release_firmware(adev->gfx.me_fw); 1223 adev->gfx.me_fw = NULL; 1224 release_firmware(adev->gfx.ce_fw); 1225 adev->gfx.ce_fw = NULL; 1226 release_firmware(adev->gfx.rlc_fw); 1227 adev->gfx.rlc_fw = NULL; 1228 release_firmware(adev->gfx.mec_fw); 1229 adev->gfx.mec_fw = NULL; 1230 release_firmware(adev->gfx.mec2_fw); 1231 adev->gfx.mec2_fw = NULL; 1232 1233 kfree(adev->gfx.rlc.register_list_format); 1234 } 1235 1236 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1237 { 1238 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1239 1240 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1241 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1242 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1243 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1244 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1245 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1246 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1247 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1248 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1249 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1250 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1251 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1252 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1253 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1254 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1255 } 1256 1257 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1258 { 1259 adev->gfx.me_fw_write_wait = false; 1260 adev->gfx.mec_fw_write_wait = false; 1261 1262 if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) && 1263 ((adev->gfx.mec_fw_version < 0x000001a5) || 1264 (adev->gfx.mec_feature_version < 46) || 1265 (adev->gfx.pfp_fw_version < 0x000000b7) || 1266 (adev->gfx.pfp_feature_version < 46))) 1267 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1268 1269 switch (adev->ip_versions[GC_HWIP][0]) { 1270 case IP_VERSION(9, 0, 1): 1271 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1272 (adev->gfx.me_feature_version >= 42) && 1273 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1274 (adev->gfx.pfp_feature_version >= 42)) 1275 adev->gfx.me_fw_write_wait = true; 1276 1277 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1278 (adev->gfx.mec_feature_version >= 42)) 1279 adev->gfx.mec_fw_write_wait = true; 1280 break; 1281 case IP_VERSION(9, 2, 1): 1282 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1283 (adev->gfx.me_feature_version >= 44) && 1284 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1285 (adev->gfx.pfp_feature_version >= 44)) 1286 adev->gfx.me_fw_write_wait = true; 1287 1288 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1289 (adev->gfx.mec_feature_version >= 44)) 1290 adev->gfx.mec_fw_write_wait = true; 1291 break; 1292 case IP_VERSION(9, 4, 0): 1293 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1294 (adev->gfx.me_feature_version >= 44) && 1295 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1296 (adev->gfx.pfp_feature_version >= 44)) 1297 adev->gfx.me_fw_write_wait = true; 1298 1299 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1300 (adev->gfx.mec_feature_version >= 44)) 1301 adev->gfx.mec_fw_write_wait = true; 1302 break; 1303 case IP_VERSION(9, 1, 0): 1304 case IP_VERSION(9, 2, 2): 1305 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1306 (adev->gfx.me_feature_version >= 42) && 1307 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1308 (adev->gfx.pfp_feature_version >= 42)) 1309 adev->gfx.me_fw_write_wait = true; 1310 1311 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1312 (adev->gfx.mec_feature_version >= 42)) 1313 adev->gfx.mec_fw_write_wait = true; 1314 break; 1315 default: 1316 adev->gfx.me_fw_write_wait = true; 1317 adev->gfx.mec_fw_write_wait = true; 1318 break; 1319 } 1320 } 1321 1322 struct amdgpu_gfxoff_quirk { 1323 u16 chip_vendor; 1324 u16 chip_device; 1325 u16 subsys_vendor; 1326 u16 subsys_device; 1327 u8 revision; 1328 }; 1329 1330 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1331 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1332 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1333 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1334 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1335 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1336 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1337 { 0, 0, 0, 0, 0 }, 1338 }; 1339 1340 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1341 { 1342 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1343 1344 while (p && p->chip_device != 0) { 1345 if (pdev->vendor == p->chip_vendor && 1346 pdev->device == p->chip_device && 1347 pdev->subsystem_vendor == p->subsys_vendor && 1348 pdev->subsystem_device == p->subsys_device && 1349 pdev->revision == p->revision) { 1350 return true; 1351 } 1352 ++p; 1353 } 1354 return false; 1355 } 1356 1357 static bool is_raven_kicker(struct amdgpu_device *adev) 1358 { 1359 if (adev->pm.fw_version >= 0x41e2b) 1360 return true; 1361 else 1362 return false; 1363 } 1364 1365 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1366 { 1367 if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) && 1368 (adev->gfx.me_fw_version >= 0x000000a5) && 1369 (adev->gfx.me_feature_version >= 52)) 1370 return true; 1371 else 1372 return false; 1373 } 1374 1375 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1376 { 1377 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1378 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1379 1380 switch (adev->ip_versions[GC_HWIP][0]) { 1381 case IP_VERSION(9, 0, 1): 1382 case IP_VERSION(9, 2, 1): 1383 case IP_VERSION(9, 4, 0): 1384 break; 1385 case IP_VERSION(9, 2, 2): 1386 case IP_VERSION(9, 1, 0): 1387 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1388 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1389 ((!is_raven_kicker(adev) && 1390 adev->gfx.rlc_fw_version < 531) || 1391 (adev->gfx.rlc_feature_version < 1) || 1392 !adev->gfx.rlc.is_rlc_v2_1)) 1393 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1394 1395 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1396 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1397 AMD_PG_SUPPORT_CP | 1398 AMD_PG_SUPPORT_RLC_SMU_HS; 1399 break; 1400 case IP_VERSION(9, 3, 0): 1401 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1402 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1403 AMD_PG_SUPPORT_CP | 1404 AMD_PG_SUPPORT_RLC_SMU_HS; 1405 break; 1406 default: 1407 break; 1408 } 1409 } 1410 1411 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1412 const char *chip_name) 1413 { 1414 char fw_name[30]; 1415 int err; 1416 struct amdgpu_firmware_info *info = NULL; 1417 const struct common_firmware_header *header = NULL; 1418 const struct gfx_firmware_header_v1_0 *cp_hdr; 1419 1420 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1421 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1422 if (err) 1423 goto out; 1424 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1425 if (err) 1426 goto out; 1427 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1428 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1429 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1430 1431 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1432 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1433 if (err) 1434 goto out; 1435 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1436 if (err) 1437 goto out; 1438 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1439 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1440 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1441 1442 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1443 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1444 if (err) 1445 goto out; 1446 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1447 if (err) 1448 goto out; 1449 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1450 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1451 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1452 1453 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1454 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1455 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1456 info->fw = adev->gfx.pfp_fw; 1457 header = (const struct common_firmware_header *)info->fw->data; 1458 adev->firmware.fw_size += 1459 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1460 1461 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1462 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1463 info->fw = adev->gfx.me_fw; 1464 header = (const struct common_firmware_header *)info->fw->data; 1465 adev->firmware.fw_size += 1466 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1467 1468 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1469 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1470 info->fw = adev->gfx.ce_fw; 1471 header = (const struct common_firmware_header *)info->fw->data; 1472 adev->firmware.fw_size += 1473 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1474 } 1475 1476 out: 1477 if (err) { 1478 dev_err(adev->dev, 1479 "gfx9: Failed to load firmware \"%s\"\n", 1480 fw_name); 1481 release_firmware(adev->gfx.pfp_fw); 1482 adev->gfx.pfp_fw = NULL; 1483 release_firmware(adev->gfx.me_fw); 1484 adev->gfx.me_fw = NULL; 1485 release_firmware(adev->gfx.ce_fw); 1486 adev->gfx.ce_fw = NULL; 1487 } 1488 return err; 1489 } 1490 1491 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1492 const char *chip_name) 1493 { 1494 char fw_name[30]; 1495 int err; 1496 struct amdgpu_firmware_info *info = NULL; 1497 const struct common_firmware_header *header = NULL; 1498 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1499 unsigned int *tmp = NULL; 1500 unsigned int i = 0; 1501 uint16_t version_major; 1502 uint16_t version_minor; 1503 uint32_t smu_version; 1504 1505 /* 1506 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1507 * instead of picasso_rlc.bin. 1508 * Judgment method: 1509 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1510 * or revision >= 0xD8 && revision <= 0xDF 1511 * otherwise is PCO FP5 1512 */ 1513 if (!strcmp(chip_name, "picasso") && 1514 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1515 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1516 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1517 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1518 (smu_version >= 0x41e2b)) 1519 /** 1520 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1521 */ 1522 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1523 else 1524 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1525 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1526 if (err) 1527 goto out; 1528 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1529 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1530 1531 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1532 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1533 if (version_major == 2 && version_minor == 1) 1534 adev->gfx.rlc.is_rlc_v2_1 = true; 1535 1536 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1537 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1538 adev->gfx.rlc.save_and_restore_offset = 1539 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1540 adev->gfx.rlc.clear_state_descriptor_offset = 1541 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1542 adev->gfx.rlc.avail_scratch_ram_locations = 1543 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1544 adev->gfx.rlc.reg_restore_list_size = 1545 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1546 adev->gfx.rlc.reg_list_format_start = 1547 le32_to_cpu(rlc_hdr->reg_list_format_start); 1548 adev->gfx.rlc.reg_list_format_separate_start = 1549 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1550 adev->gfx.rlc.starting_offsets_start = 1551 le32_to_cpu(rlc_hdr->starting_offsets_start); 1552 adev->gfx.rlc.reg_list_format_size_bytes = 1553 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1554 adev->gfx.rlc.reg_list_size_bytes = 1555 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1556 adev->gfx.rlc.register_list_format = 1557 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1558 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1559 if (!adev->gfx.rlc.register_list_format) { 1560 err = -ENOMEM; 1561 goto out; 1562 } 1563 1564 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1565 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1566 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1567 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1568 1569 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1570 1571 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1572 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1573 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1574 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1575 1576 if (adev->gfx.rlc.is_rlc_v2_1) 1577 gfx_v9_0_init_rlc_ext_microcode(adev); 1578 1579 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1580 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1581 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1582 info->fw = adev->gfx.rlc_fw; 1583 header = (const struct common_firmware_header *)info->fw->data; 1584 adev->firmware.fw_size += 1585 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1586 1587 if (adev->gfx.rlc.is_rlc_v2_1 && 1588 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1589 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1590 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1591 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1592 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1593 info->fw = adev->gfx.rlc_fw; 1594 adev->firmware.fw_size += 1595 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1596 1597 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1598 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1599 info->fw = adev->gfx.rlc_fw; 1600 adev->firmware.fw_size += 1601 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1602 1603 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1604 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1605 info->fw = adev->gfx.rlc_fw; 1606 adev->firmware.fw_size += 1607 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1608 } 1609 } 1610 1611 out: 1612 if (err) { 1613 dev_err(adev->dev, 1614 "gfx9: Failed to load firmware \"%s\"\n", 1615 fw_name); 1616 release_firmware(adev->gfx.rlc_fw); 1617 adev->gfx.rlc_fw = NULL; 1618 } 1619 return err; 1620 } 1621 1622 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1623 { 1624 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || 1625 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || 1626 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) 1627 return false; 1628 1629 return true; 1630 } 1631 1632 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1633 const char *chip_name) 1634 { 1635 char fw_name[30]; 1636 int err; 1637 struct amdgpu_firmware_info *info = NULL; 1638 const struct common_firmware_header *header = NULL; 1639 const struct gfx_firmware_header_v1_0 *cp_hdr; 1640 1641 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1642 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1643 if (err) 1644 goto out; 1645 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1646 if (err) 1647 goto out; 1648 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1649 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1650 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1651 1652 1653 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1654 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1655 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1656 if (!err) { 1657 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1658 if (err) 1659 goto out; 1660 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1661 adev->gfx.mec2_fw->data; 1662 adev->gfx.mec2_fw_version = 1663 le32_to_cpu(cp_hdr->header.ucode_version); 1664 adev->gfx.mec2_feature_version = 1665 le32_to_cpu(cp_hdr->ucode_feature_version); 1666 } else { 1667 err = 0; 1668 adev->gfx.mec2_fw = NULL; 1669 } 1670 } else { 1671 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1672 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1673 } 1674 1675 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1676 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1677 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1678 info->fw = adev->gfx.mec_fw; 1679 header = (const struct common_firmware_header *)info->fw->data; 1680 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1681 adev->firmware.fw_size += 1682 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1683 1684 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1685 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1686 info->fw = adev->gfx.mec_fw; 1687 adev->firmware.fw_size += 1688 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1689 1690 if (adev->gfx.mec2_fw) { 1691 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1692 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1693 info->fw = adev->gfx.mec2_fw; 1694 header = (const struct common_firmware_header *)info->fw->data; 1695 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1696 adev->firmware.fw_size += 1697 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1698 1699 /* TODO: Determine if MEC2 JT FW loading can be removed 1700 for all GFX V9 asic and above */ 1701 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1702 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1703 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1704 info->fw = adev->gfx.mec2_fw; 1705 adev->firmware.fw_size += 1706 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1707 PAGE_SIZE); 1708 } 1709 } 1710 } 1711 1712 out: 1713 gfx_v9_0_check_if_need_gfxoff(adev); 1714 gfx_v9_0_check_fw_write_wait(adev); 1715 if (err) { 1716 dev_err(adev->dev, 1717 "gfx9: Failed to load firmware \"%s\"\n", 1718 fw_name); 1719 release_firmware(adev->gfx.mec_fw); 1720 adev->gfx.mec_fw = NULL; 1721 release_firmware(adev->gfx.mec2_fw); 1722 adev->gfx.mec2_fw = NULL; 1723 } 1724 return err; 1725 } 1726 1727 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1728 { 1729 const char *chip_name; 1730 int r; 1731 1732 DRM_DEBUG("\n"); 1733 1734 switch (adev->ip_versions[GC_HWIP][0]) { 1735 case IP_VERSION(9, 0, 1): 1736 chip_name = "vega10"; 1737 break; 1738 case IP_VERSION(9, 2, 1): 1739 chip_name = "vega12"; 1740 break; 1741 case IP_VERSION(9, 4, 0): 1742 chip_name = "vega20"; 1743 break; 1744 case IP_VERSION(9, 2, 2): 1745 case IP_VERSION(9, 1, 0): 1746 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1747 chip_name = "raven2"; 1748 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1749 chip_name = "picasso"; 1750 else 1751 chip_name = "raven"; 1752 break; 1753 case IP_VERSION(9, 4, 1): 1754 chip_name = "arcturus"; 1755 break; 1756 case IP_VERSION(9, 3, 0): 1757 if (adev->apu_flags & AMD_APU_IS_RENOIR) 1758 chip_name = "renoir"; 1759 else 1760 chip_name = "green_sardine"; 1761 break; 1762 case IP_VERSION(9, 4, 2): 1763 chip_name = "aldebaran"; 1764 break; 1765 default: 1766 BUG(); 1767 } 1768 1769 /* No CPG in Arcturus */ 1770 if (adev->gfx.num_gfx_rings) { 1771 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1772 if (r) 1773 return r; 1774 } 1775 1776 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1777 if (r) 1778 return r; 1779 1780 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1781 if (r) 1782 return r; 1783 1784 return r; 1785 } 1786 1787 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1788 { 1789 u32 count = 0; 1790 const struct cs_section_def *sect = NULL; 1791 const struct cs_extent_def *ext = NULL; 1792 1793 /* begin clear state */ 1794 count += 2; 1795 /* context control state */ 1796 count += 3; 1797 1798 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1799 for (ext = sect->section; ext->extent != NULL; ++ext) { 1800 if (sect->id == SECT_CONTEXT) 1801 count += 2 + ext->reg_count; 1802 else 1803 return 0; 1804 } 1805 } 1806 1807 /* end clear state */ 1808 count += 2; 1809 /* clear state */ 1810 count += 2; 1811 1812 return count; 1813 } 1814 1815 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1816 volatile u32 *buffer) 1817 { 1818 u32 count = 0, i; 1819 const struct cs_section_def *sect = NULL; 1820 const struct cs_extent_def *ext = NULL; 1821 1822 if (adev->gfx.rlc.cs_data == NULL) 1823 return; 1824 if (buffer == NULL) 1825 return; 1826 1827 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1828 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1829 1830 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1831 buffer[count++] = cpu_to_le32(0x80000000); 1832 buffer[count++] = cpu_to_le32(0x80000000); 1833 1834 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1835 for (ext = sect->section; ext->extent != NULL; ++ext) { 1836 if (sect->id == SECT_CONTEXT) { 1837 buffer[count++] = 1838 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1839 buffer[count++] = cpu_to_le32(ext->reg_index - 1840 PACKET3_SET_CONTEXT_REG_START); 1841 for (i = 0; i < ext->reg_count; i++) 1842 buffer[count++] = cpu_to_le32(ext->extent[i]); 1843 } else { 1844 return; 1845 } 1846 } 1847 } 1848 1849 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1850 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1851 1852 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1853 buffer[count++] = cpu_to_le32(0); 1854 } 1855 1856 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1857 { 1858 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1859 uint32_t pg_always_on_cu_num = 2; 1860 uint32_t always_on_cu_num; 1861 uint32_t i, j, k; 1862 uint32_t mask, cu_bitmap, counter; 1863 1864 if (adev->flags & AMD_IS_APU) 1865 always_on_cu_num = 4; 1866 else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1)) 1867 always_on_cu_num = 8; 1868 else 1869 always_on_cu_num = 12; 1870 1871 mutex_lock(&adev->grbm_idx_mutex); 1872 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1873 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1874 mask = 1; 1875 cu_bitmap = 0; 1876 counter = 0; 1877 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1878 1879 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1880 if (cu_info->bitmap[i][j] & mask) { 1881 if (counter == pg_always_on_cu_num) 1882 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1883 if (counter < always_on_cu_num) 1884 cu_bitmap |= mask; 1885 else 1886 break; 1887 counter++; 1888 } 1889 mask <<= 1; 1890 } 1891 1892 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1893 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1894 } 1895 } 1896 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1897 mutex_unlock(&adev->grbm_idx_mutex); 1898 } 1899 1900 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1901 { 1902 uint32_t data; 1903 1904 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1905 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1906 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1907 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1908 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1909 1910 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1911 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1912 1913 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1914 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1915 1916 mutex_lock(&adev->grbm_idx_mutex); 1917 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1918 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1919 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1920 1921 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1922 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1923 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1924 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1925 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1926 1927 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1928 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1929 data &= 0x0000FFFF; 1930 data |= 0x00C00000; 1931 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1932 1933 /* 1934 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1935 * programmed in gfx_v9_0_init_always_on_cu_mask() 1936 */ 1937 1938 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1939 * but used for RLC_LB_CNTL configuration */ 1940 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1941 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1942 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1943 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1944 mutex_unlock(&adev->grbm_idx_mutex); 1945 1946 gfx_v9_0_init_always_on_cu_mask(adev); 1947 } 1948 1949 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1950 { 1951 uint32_t data; 1952 1953 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1954 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1955 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1956 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1957 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1958 1959 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1960 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1961 1962 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1963 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1964 1965 mutex_lock(&adev->grbm_idx_mutex); 1966 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1967 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1968 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1969 1970 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1971 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1972 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1973 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1974 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1975 1976 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1977 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1978 data &= 0x0000FFFF; 1979 data |= 0x00C00000; 1980 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1981 1982 /* 1983 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1984 * programmed in gfx_v9_0_init_always_on_cu_mask() 1985 */ 1986 1987 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1988 * but used for RLC_LB_CNTL configuration */ 1989 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1990 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1991 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1992 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1993 mutex_unlock(&adev->grbm_idx_mutex); 1994 1995 gfx_v9_0_init_always_on_cu_mask(adev); 1996 } 1997 1998 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1999 { 2000 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 2001 } 2002 2003 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 2004 { 2005 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 2006 return 5; 2007 else 2008 return 4; 2009 } 2010 2011 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 2012 { 2013 const struct cs_section_def *cs_data; 2014 int r; 2015 2016 adev->gfx.rlc.cs_data = gfx9_cs_data; 2017 2018 cs_data = adev->gfx.rlc.cs_data; 2019 2020 if (cs_data) { 2021 /* init clear state block */ 2022 r = amdgpu_gfx_rlc_init_csb(adev); 2023 if (r) 2024 return r; 2025 } 2026 2027 if (adev->flags & AMD_IS_APU) { 2028 /* TODO: double check the cp_table_size for RV */ 2029 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 2030 r = amdgpu_gfx_rlc_init_cpt(adev); 2031 if (r) 2032 return r; 2033 } 2034 2035 switch (adev->ip_versions[GC_HWIP][0]) { 2036 case IP_VERSION(9, 2, 2): 2037 case IP_VERSION(9, 1, 0): 2038 gfx_v9_0_init_lbpw(adev); 2039 break; 2040 case IP_VERSION(9, 4, 0): 2041 gfx_v9_4_init_lbpw(adev); 2042 break; 2043 default: 2044 break; 2045 } 2046 2047 /* init spm vmid with 0xf */ 2048 if (adev->gfx.rlc.funcs->update_spm_vmid) 2049 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 2050 2051 return 0; 2052 } 2053 2054 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 2055 { 2056 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 2057 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 2058 } 2059 2060 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 2061 { 2062 int r; 2063 u32 *hpd; 2064 const __le32 *fw_data; 2065 unsigned fw_size; 2066 u32 *fw; 2067 size_t mec_hpd_size; 2068 2069 const struct gfx_firmware_header_v1_0 *mec_hdr; 2070 2071 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2072 2073 /* take ownership of the relevant compute queues */ 2074 amdgpu_gfx_compute_queue_acquire(adev); 2075 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 2076 if (mec_hpd_size) { 2077 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 2078 AMDGPU_GEM_DOMAIN_VRAM, 2079 &adev->gfx.mec.hpd_eop_obj, 2080 &adev->gfx.mec.hpd_eop_gpu_addr, 2081 (void **)&hpd); 2082 if (r) { 2083 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 2084 gfx_v9_0_mec_fini(adev); 2085 return r; 2086 } 2087 2088 memset(hpd, 0, mec_hpd_size); 2089 2090 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 2091 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 2092 } 2093 2094 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2095 2096 fw_data = (const __le32 *) 2097 (adev->gfx.mec_fw->data + 2098 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2099 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 2100 2101 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 2102 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2103 &adev->gfx.mec.mec_fw_obj, 2104 &adev->gfx.mec.mec_fw_gpu_addr, 2105 (void **)&fw); 2106 if (r) { 2107 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 2108 gfx_v9_0_mec_fini(adev); 2109 return r; 2110 } 2111 2112 memcpy(fw, fw_data, fw_size); 2113 2114 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 2115 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 2116 2117 return 0; 2118 } 2119 2120 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 2121 { 2122 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2123 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2124 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2125 (address << SQ_IND_INDEX__INDEX__SHIFT) | 2126 (SQ_IND_INDEX__FORCE_READ_MASK)); 2127 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2128 } 2129 2130 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 2131 uint32_t wave, uint32_t thread, 2132 uint32_t regno, uint32_t num, uint32_t *out) 2133 { 2134 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2135 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2136 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2137 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 2138 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 2139 (SQ_IND_INDEX__FORCE_READ_MASK) | 2140 (SQ_IND_INDEX__AUTO_INCR_MASK)); 2141 while (num--) 2142 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2143 } 2144 2145 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 2146 { 2147 /* type 1 wave data */ 2148 dst[(*no_fields)++] = 1; 2149 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 2150 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 2151 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 2152 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 2153 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 2154 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 2155 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 2156 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 2157 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 2158 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 2159 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 2160 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 2161 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 2162 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 2163 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 2164 } 2165 2166 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 2167 uint32_t wave, uint32_t start, 2168 uint32_t size, uint32_t *dst) 2169 { 2170 wave_read_regs( 2171 adev, simd, wave, 0, 2172 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 2173 } 2174 2175 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 2176 uint32_t wave, uint32_t thread, 2177 uint32_t start, uint32_t size, 2178 uint32_t *dst) 2179 { 2180 wave_read_regs( 2181 adev, simd, wave, thread, 2182 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 2183 } 2184 2185 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 2186 u32 me, u32 pipe, u32 q, u32 vm) 2187 { 2188 soc15_grbm_select(adev, me, pipe, q, vm); 2189 } 2190 2191 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2192 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2193 .select_se_sh = &gfx_v9_0_select_se_sh, 2194 .read_wave_data = &gfx_v9_0_read_wave_data, 2195 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2196 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2197 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2198 }; 2199 2200 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = { 2201 .ras_late_init = amdgpu_gfx_ras_late_init, 2202 .ras_fini = amdgpu_gfx_ras_fini, 2203 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2204 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2205 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2206 }; 2207 2208 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2209 { 2210 u32 gb_addr_config; 2211 int err; 2212 2213 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 2214 2215 switch (adev->ip_versions[GC_HWIP][0]) { 2216 case IP_VERSION(9, 0, 1): 2217 adev->gfx.config.max_hw_contexts = 8; 2218 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2219 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2220 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2221 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2222 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2223 break; 2224 case IP_VERSION(9, 2, 1): 2225 adev->gfx.config.max_hw_contexts = 8; 2226 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2227 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2228 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2229 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2230 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2231 DRM_INFO("fix gfx.config for vega12\n"); 2232 break; 2233 case IP_VERSION(9, 4, 0): 2234 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs; 2235 adev->gfx.config.max_hw_contexts = 8; 2236 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2237 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2238 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2239 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2240 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2241 gb_addr_config &= ~0xf3e777ff; 2242 gb_addr_config |= 0x22014042; 2243 /* check vbios table if gpu info is not available */ 2244 err = amdgpu_atomfirmware_get_gfx_info(adev); 2245 if (err) 2246 return err; 2247 break; 2248 case IP_VERSION(9, 2, 2): 2249 case IP_VERSION(9, 1, 0): 2250 adev->gfx.config.max_hw_contexts = 8; 2251 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2252 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2253 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2254 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2255 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2256 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2257 else 2258 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2259 break; 2260 case IP_VERSION(9, 4, 1): 2261 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs; 2262 adev->gfx.config.max_hw_contexts = 8; 2263 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2264 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2265 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2266 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2267 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2268 gb_addr_config &= ~0xf3e777ff; 2269 gb_addr_config |= 0x22014042; 2270 break; 2271 case IP_VERSION(9, 3, 0): 2272 adev->gfx.config.max_hw_contexts = 8; 2273 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2274 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2275 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2276 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2277 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2278 gb_addr_config &= ~0xf3e777ff; 2279 gb_addr_config |= 0x22010042; 2280 break; 2281 case IP_VERSION(9, 4, 2): 2282 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs; 2283 adev->gfx.config.max_hw_contexts = 8; 2284 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2285 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2286 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2287 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2288 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2289 gb_addr_config &= ~0xf3e777ff; 2290 gb_addr_config |= 0x22014042; 2291 /* check vbios table if gpu info is not available */ 2292 err = amdgpu_atomfirmware_get_gfx_info(adev); 2293 if (err) 2294 return err; 2295 break; 2296 default: 2297 BUG(); 2298 break; 2299 } 2300 2301 adev->gfx.config.gb_addr_config = gb_addr_config; 2302 2303 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2304 REG_GET_FIELD( 2305 adev->gfx.config.gb_addr_config, 2306 GB_ADDR_CONFIG, 2307 NUM_PIPES); 2308 2309 adev->gfx.config.max_tile_pipes = 2310 adev->gfx.config.gb_addr_config_fields.num_pipes; 2311 2312 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2313 REG_GET_FIELD( 2314 adev->gfx.config.gb_addr_config, 2315 GB_ADDR_CONFIG, 2316 NUM_BANKS); 2317 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2318 REG_GET_FIELD( 2319 adev->gfx.config.gb_addr_config, 2320 GB_ADDR_CONFIG, 2321 MAX_COMPRESSED_FRAGS); 2322 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2323 REG_GET_FIELD( 2324 adev->gfx.config.gb_addr_config, 2325 GB_ADDR_CONFIG, 2326 NUM_RB_PER_SE); 2327 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2328 REG_GET_FIELD( 2329 adev->gfx.config.gb_addr_config, 2330 GB_ADDR_CONFIG, 2331 NUM_SHADER_ENGINES); 2332 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2333 REG_GET_FIELD( 2334 adev->gfx.config.gb_addr_config, 2335 GB_ADDR_CONFIG, 2336 PIPE_INTERLEAVE_SIZE)); 2337 2338 return 0; 2339 } 2340 2341 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2342 int mec, int pipe, int queue) 2343 { 2344 unsigned irq_type; 2345 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2346 unsigned int hw_prio; 2347 2348 ring = &adev->gfx.compute_ring[ring_id]; 2349 2350 /* mec0 is me1 */ 2351 ring->me = mec + 1; 2352 ring->pipe = pipe; 2353 ring->queue = queue; 2354 2355 ring->ring_obj = NULL; 2356 ring->use_doorbell = true; 2357 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2358 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2359 + (ring_id * GFX9_MEC_HPD_SIZE); 2360 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2361 2362 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2363 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2364 + ring->pipe; 2365 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2366 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 2367 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2368 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2369 hw_prio, NULL); 2370 } 2371 2372 static int gfx_v9_0_sw_init(void *handle) 2373 { 2374 int i, j, k, r, ring_id; 2375 struct amdgpu_ring *ring; 2376 struct amdgpu_kiq *kiq; 2377 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2378 2379 switch (adev->ip_versions[GC_HWIP][0]) { 2380 case IP_VERSION(9, 0, 1): 2381 case IP_VERSION(9, 2, 1): 2382 case IP_VERSION(9, 4, 0): 2383 case IP_VERSION(9, 2, 2): 2384 case IP_VERSION(9, 1, 0): 2385 case IP_VERSION(9, 4, 1): 2386 case IP_VERSION(9, 3, 0): 2387 case IP_VERSION(9, 4, 2): 2388 adev->gfx.mec.num_mec = 2; 2389 break; 2390 default: 2391 adev->gfx.mec.num_mec = 1; 2392 break; 2393 } 2394 2395 adev->gfx.mec.num_pipe_per_mec = 4; 2396 adev->gfx.mec.num_queue_per_pipe = 8; 2397 2398 /* EOP Event */ 2399 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2400 if (r) 2401 return r; 2402 2403 /* Privileged reg */ 2404 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2405 &adev->gfx.priv_reg_irq); 2406 if (r) 2407 return r; 2408 2409 /* Privileged inst */ 2410 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2411 &adev->gfx.priv_inst_irq); 2412 if (r) 2413 return r; 2414 2415 /* ECC error */ 2416 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2417 &adev->gfx.cp_ecc_error_irq); 2418 if (r) 2419 return r; 2420 2421 /* FUE error */ 2422 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2423 &adev->gfx.cp_ecc_error_irq); 2424 if (r) 2425 return r; 2426 2427 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2428 2429 gfx_v9_0_scratch_init(adev); 2430 2431 r = gfx_v9_0_init_microcode(adev); 2432 if (r) { 2433 DRM_ERROR("Failed to load gfx firmware!\n"); 2434 return r; 2435 } 2436 2437 r = adev->gfx.rlc.funcs->init(adev); 2438 if (r) { 2439 DRM_ERROR("Failed to init rlc BOs!\n"); 2440 return r; 2441 } 2442 2443 r = gfx_v9_0_mec_init(adev); 2444 if (r) { 2445 DRM_ERROR("Failed to init MEC BOs!\n"); 2446 return r; 2447 } 2448 2449 /* set up the gfx ring */ 2450 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2451 ring = &adev->gfx.gfx_ring[i]; 2452 ring->ring_obj = NULL; 2453 if (!i) 2454 sprintf(ring->name, "gfx"); 2455 else 2456 sprintf(ring->name, "gfx_%d", i); 2457 ring->use_doorbell = true; 2458 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2459 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2460 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2461 AMDGPU_RING_PRIO_DEFAULT, NULL); 2462 if (r) 2463 return r; 2464 } 2465 2466 /* set up the compute queues - allocate horizontally across pipes */ 2467 ring_id = 0; 2468 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2469 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2470 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2471 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2472 continue; 2473 2474 r = gfx_v9_0_compute_ring_init(adev, 2475 ring_id, 2476 i, k, j); 2477 if (r) 2478 return r; 2479 2480 ring_id++; 2481 } 2482 } 2483 } 2484 2485 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2486 if (r) { 2487 DRM_ERROR("Failed to init KIQ BOs!\n"); 2488 return r; 2489 } 2490 2491 kiq = &adev->gfx.kiq; 2492 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2493 if (r) 2494 return r; 2495 2496 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2497 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2498 if (r) 2499 return r; 2500 2501 adev->gfx.ce_ram_size = 0x8000; 2502 2503 r = gfx_v9_0_gpu_early_init(adev); 2504 if (r) 2505 return r; 2506 2507 return 0; 2508 } 2509 2510 2511 static int gfx_v9_0_sw_fini(void *handle) 2512 { 2513 int i; 2514 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2515 2516 if (adev->gfx.ras_funcs && 2517 adev->gfx.ras_funcs->ras_fini) 2518 adev->gfx.ras_funcs->ras_fini(adev); 2519 2520 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2521 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2522 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2523 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2524 2525 amdgpu_gfx_mqd_sw_fini(adev); 2526 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2527 amdgpu_gfx_kiq_fini(adev); 2528 2529 gfx_v9_0_mec_fini(adev); 2530 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2531 &adev->gfx.rlc.clear_state_gpu_addr, 2532 (void **)&adev->gfx.rlc.cs_ptr); 2533 if (adev->flags & AMD_IS_APU) { 2534 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2535 &adev->gfx.rlc.cp_table_gpu_addr, 2536 (void **)&adev->gfx.rlc.cp_table_ptr); 2537 } 2538 gfx_v9_0_free_microcode(adev); 2539 2540 return 0; 2541 } 2542 2543 2544 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2545 { 2546 /* TODO */ 2547 } 2548 2549 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2550 u32 instance) 2551 { 2552 u32 data; 2553 2554 if (instance == 0xffffffff) 2555 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2556 else 2557 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2558 2559 if (se_num == 0xffffffff) 2560 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2561 else 2562 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2563 2564 if (sh_num == 0xffffffff) 2565 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2566 else 2567 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2568 2569 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2570 } 2571 2572 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2573 { 2574 u32 data, mask; 2575 2576 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2577 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2578 2579 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2580 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2581 2582 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2583 adev->gfx.config.max_sh_per_se); 2584 2585 return (~data) & mask; 2586 } 2587 2588 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2589 { 2590 int i, j; 2591 u32 data; 2592 u32 active_rbs = 0; 2593 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2594 adev->gfx.config.max_sh_per_se; 2595 2596 mutex_lock(&adev->grbm_idx_mutex); 2597 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2598 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2599 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2600 data = gfx_v9_0_get_rb_active_bitmap(adev); 2601 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2602 rb_bitmap_width_per_sh); 2603 } 2604 } 2605 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2606 mutex_unlock(&adev->grbm_idx_mutex); 2607 2608 adev->gfx.config.backend_enable_mask = active_rbs; 2609 adev->gfx.config.num_rbs = hweight32(active_rbs); 2610 } 2611 2612 #define DEFAULT_SH_MEM_BASES (0x6000) 2613 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2614 { 2615 int i; 2616 uint32_t sh_mem_config; 2617 uint32_t sh_mem_bases; 2618 2619 /* 2620 * Configure apertures: 2621 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2622 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2623 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2624 */ 2625 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2626 2627 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2628 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2629 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2630 2631 mutex_lock(&adev->srbm_mutex); 2632 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2633 soc15_grbm_select(adev, 0, 0, 0, i); 2634 /* CP and shaders */ 2635 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2636 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2637 } 2638 soc15_grbm_select(adev, 0, 0, 0, 0); 2639 mutex_unlock(&adev->srbm_mutex); 2640 2641 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2642 acccess. These should be enabled by FW for target VMIDs. */ 2643 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2644 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2645 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2646 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2647 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2648 } 2649 } 2650 2651 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2652 { 2653 int vmid; 2654 2655 /* 2656 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2657 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2658 * the driver can enable them for graphics. VMID0 should maintain 2659 * access so that HWS firmware can save/restore entries. 2660 */ 2661 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2662 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2663 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2664 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2665 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2666 } 2667 } 2668 2669 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2670 { 2671 uint32_t tmp; 2672 2673 switch (adev->ip_versions[GC_HWIP][0]) { 2674 case IP_VERSION(9, 4, 1): 2675 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2676 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, 2677 DISABLE_BARRIER_WAITCNT, 1); 2678 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2679 break; 2680 default: 2681 break; 2682 } 2683 } 2684 2685 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2686 { 2687 u32 tmp; 2688 int i; 2689 2690 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2691 2692 gfx_v9_0_tiling_mode_table_init(adev); 2693 2694 gfx_v9_0_setup_rb(adev); 2695 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2696 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2697 2698 /* XXX SH_MEM regs */ 2699 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2700 mutex_lock(&adev->srbm_mutex); 2701 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2702 soc15_grbm_select(adev, 0, 0, 0, i); 2703 /* CP and shaders */ 2704 if (i == 0) { 2705 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2706 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2707 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2708 !!adev->gmc.noretry); 2709 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2710 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2711 } else { 2712 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2713 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2714 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2715 !!adev->gmc.noretry); 2716 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2717 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2718 (adev->gmc.private_aperture_start >> 48)); 2719 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2720 (adev->gmc.shared_aperture_start >> 48)); 2721 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2722 } 2723 } 2724 soc15_grbm_select(adev, 0, 0, 0, 0); 2725 2726 mutex_unlock(&adev->srbm_mutex); 2727 2728 gfx_v9_0_init_compute_vmid(adev); 2729 gfx_v9_0_init_gds_vmid(adev); 2730 gfx_v9_0_init_sq_config(adev); 2731 } 2732 2733 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2734 { 2735 u32 i, j, k; 2736 u32 mask; 2737 2738 mutex_lock(&adev->grbm_idx_mutex); 2739 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2740 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2741 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2742 for (k = 0; k < adev->usec_timeout; k++) { 2743 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2744 break; 2745 udelay(1); 2746 } 2747 if (k == adev->usec_timeout) { 2748 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2749 0xffffffff, 0xffffffff); 2750 mutex_unlock(&adev->grbm_idx_mutex); 2751 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2752 i, j); 2753 return; 2754 } 2755 } 2756 } 2757 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2758 mutex_unlock(&adev->grbm_idx_mutex); 2759 2760 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2761 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2762 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2763 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2764 for (k = 0; k < adev->usec_timeout; k++) { 2765 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2766 break; 2767 udelay(1); 2768 } 2769 } 2770 2771 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2772 bool enable) 2773 { 2774 u32 tmp; 2775 2776 /* These interrupts should be enabled to drive DS clock */ 2777 2778 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2779 2780 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2781 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2782 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2783 if(adev->gfx.num_gfx_rings) 2784 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2785 2786 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2787 } 2788 2789 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2790 { 2791 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2792 /* csib */ 2793 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2794 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2795 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2796 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2797 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2798 adev->gfx.rlc.clear_state_size); 2799 } 2800 2801 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2802 int indirect_offset, 2803 int list_size, 2804 int *unique_indirect_regs, 2805 int unique_indirect_reg_count, 2806 int *indirect_start_offsets, 2807 int *indirect_start_offsets_count, 2808 int max_start_offsets_count) 2809 { 2810 int idx; 2811 2812 for (; indirect_offset < list_size; indirect_offset++) { 2813 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2814 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2815 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2816 2817 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2818 indirect_offset += 2; 2819 2820 /* look for the matching indice */ 2821 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2822 if (unique_indirect_regs[idx] == 2823 register_list_format[indirect_offset] || 2824 !unique_indirect_regs[idx]) 2825 break; 2826 } 2827 2828 BUG_ON(idx >= unique_indirect_reg_count); 2829 2830 if (!unique_indirect_regs[idx]) 2831 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2832 2833 indirect_offset++; 2834 } 2835 } 2836 } 2837 2838 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2839 { 2840 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2841 int unique_indirect_reg_count = 0; 2842 2843 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2844 int indirect_start_offsets_count = 0; 2845 2846 int list_size = 0; 2847 int i = 0, j = 0; 2848 u32 tmp = 0; 2849 2850 u32 *register_list_format = 2851 kmemdup(adev->gfx.rlc.register_list_format, 2852 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2853 if (!register_list_format) 2854 return -ENOMEM; 2855 2856 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2857 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2858 gfx_v9_1_parse_ind_reg_list(register_list_format, 2859 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2860 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2861 unique_indirect_regs, 2862 unique_indirect_reg_count, 2863 indirect_start_offsets, 2864 &indirect_start_offsets_count, 2865 ARRAY_SIZE(indirect_start_offsets)); 2866 2867 /* enable auto inc in case it is disabled */ 2868 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2869 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2870 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2871 2872 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2873 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2874 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2875 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2876 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2877 adev->gfx.rlc.register_restore[i]); 2878 2879 /* load indirect register */ 2880 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2881 adev->gfx.rlc.reg_list_format_start); 2882 2883 /* direct register portion */ 2884 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2885 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2886 register_list_format[i]); 2887 2888 /* indirect register portion */ 2889 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2890 if (register_list_format[i] == 0xFFFFFFFF) { 2891 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2892 continue; 2893 } 2894 2895 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2896 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2897 2898 for (j = 0; j < unique_indirect_reg_count; j++) { 2899 if (register_list_format[i] == unique_indirect_regs[j]) { 2900 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2901 break; 2902 } 2903 } 2904 2905 BUG_ON(j >= unique_indirect_reg_count); 2906 2907 i++; 2908 } 2909 2910 /* set save/restore list size */ 2911 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2912 list_size = list_size >> 1; 2913 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2914 adev->gfx.rlc.reg_restore_list_size); 2915 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2916 2917 /* write the starting offsets to RLC scratch ram */ 2918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2919 adev->gfx.rlc.starting_offsets_start); 2920 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2921 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2922 indirect_start_offsets[i]); 2923 2924 /* load unique indirect regs*/ 2925 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2926 if (unique_indirect_regs[i] != 0) { 2927 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2928 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2929 unique_indirect_regs[i] & 0x3FFFF); 2930 2931 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2932 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2933 unique_indirect_regs[i] >> 20); 2934 } 2935 } 2936 2937 kfree(register_list_format); 2938 return 0; 2939 } 2940 2941 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2942 { 2943 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2944 } 2945 2946 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2947 bool enable) 2948 { 2949 uint32_t data = 0; 2950 uint32_t default_data = 0; 2951 2952 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2953 if (enable) { 2954 /* enable GFXIP control over CGPG */ 2955 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2956 if(default_data != data) 2957 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2958 2959 /* update status */ 2960 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2961 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2962 if(default_data != data) 2963 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2964 } else { 2965 /* restore GFXIP control over GCPG */ 2966 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2967 if(default_data != data) 2968 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2969 } 2970 } 2971 2972 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2973 { 2974 uint32_t data = 0; 2975 2976 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2977 AMD_PG_SUPPORT_GFX_SMG | 2978 AMD_PG_SUPPORT_GFX_DMG)) { 2979 /* init IDLE_POLL_COUNT = 60 */ 2980 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2981 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2982 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2983 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2984 2985 /* init RLC PG Delay */ 2986 data = 0; 2987 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2988 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2989 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2990 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2991 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2992 2993 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2994 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2995 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2996 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2997 2998 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2999 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 3000 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 3001 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 3002 3003 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 3004 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 3005 3006 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 3007 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 3008 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 3009 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0)) 3010 pwr_10_0_gfxip_control_over_cgpg(adev, true); 3011 } 3012 } 3013 3014 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3015 bool enable) 3016 { 3017 uint32_t data = 0; 3018 uint32_t default_data = 0; 3019 3020 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3021 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3022 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 3023 enable ? 1 : 0); 3024 if (default_data != data) 3025 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3026 } 3027 3028 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 3029 bool enable) 3030 { 3031 uint32_t data = 0; 3032 uint32_t default_data = 0; 3033 3034 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3035 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3036 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 3037 enable ? 1 : 0); 3038 if(default_data != data) 3039 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3040 } 3041 3042 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 3043 bool enable) 3044 { 3045 uint32_t data = 0; 3046 uint32_t default_data = 0; 3047 3048 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3049 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3050 CP_PG_DISABLE, 3051 enable ? 0 : 1); 3052 if(default_data != data) 3053 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3054 } 3055 3056 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 3057 bool enable) 3058 { 3059 uint32_t data, default_data; 3060 3061 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3062 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3063 GFX_POWER_GATING_ENABLE, 3064 enable ? 1 : 0); 3065 if(default_data != data) 3066 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3067 } 3068 3069 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 3070 bool enable) 3071 { 3072 uint32_t data, default_data; 3073 3074 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3075 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3076 GFX_PIPELINE_PG_ENABLE, 3077 enable ? 1 : 0); 3078 if(default_data != data) 3079 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3080 3081 if (!enable) 3082 /* read any GFX register to wake up GFX */ 3083 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3084 } 3085 3086 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3087 bool enable) 3088 { 3089 uint32_t data, default_data; 3090 3091 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3092 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3093 STATIC_PER_CU_PG_ENABLE, 3094 enable ? 1 : 0); 3095 if(default_data != data) 3096 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3097 } 3098 3099 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3100 bool enable) 3101 { 3102 uint32_t data, default_data; 3103 3104 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3105 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3106 DYN_PER_CU_PG_ENABLE, 3107 enable ? 1 : 0); 3108 if(default_data != data) 3109 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3110 } 3111 3112 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3113 { 3114 gfx_v9_0_init_csb(adev); 3115 3116 /* 3117 * Rlc save restore list is workable since v2_1. 3118 * And it's needed by gfxoff feature. 3119 */ 3120 if (adev->gfx.rlc.is_rlc_v2_1) { 3121 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) || 3122 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3123 gfx_v9_1_init_rlc_save_restore_list(adev); 3124 gfx_v9_0_enable_save_restore_machine(adev); 3125 } 3126 3127 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3128 AMD_PG_SUPPORT_GFX_SMG | 3129 AMD_PG_SUPPORT_GFX_DMG | 3130 AMD_PG_SUPPORT_CP | 3131 AMD_PG_SUPPORT_GDS | 3132 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3133 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 3134 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3135 gfx_v9_0_init_gfx_power_gating(adev); 3136 } 3137 } 3138 3139 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3140 { 3141 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3142 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3143 gfx_v9_0_wait_for_rlc_serdes(adev); 3144 } 3145 3146 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3147 { 3148 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3149 udelay(50); 3150 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3151 udelay(50); 3152 } 3153 3154 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3155 { 3156 #ifdef AMDGPU_RLC_DEBUG_RETRY 3157 u32 rlc_ucode_ver; 3158 #endif 3159 3160 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3161 udelay(50); 3162 3163 /* carrizo do enable cp interrupt after cp inited */ 3164 if (!(adev->flags & AMD_IS_APU)) { 3165 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3166 udelay(50); 3167 } 3168 3169 #ifdef AMDGPU_RLC_DEBUG_RETRY 3170 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3171 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3172 if(rlc_ucode_ver == 0x108) { 3173 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3174 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3175 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3176 * default is 0x9C4 to create a 100us interval */ 3177 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3178 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3179 * to disable the page fault retry interrupts, default is 3180 * 0x100 (256) */ 3181 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3182 } 3183 #endif 3184 } 3185 3186 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3187 { 3188 const struct rlc_firmware_header_v2_0 *hdr; 3189 const __le32 *fw_data; 3190 unsigned i, fw_size; 3191 3192 if (!adev->gfx.rlc_fw) 3193 return -EINVAL; 3194 3195 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3196 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3197 3198 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3199 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3200 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3201 3202 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3203 RLCG_UCODE_LOADING_START_ADDRESS); 3204 for (i = 0; i < fw_size; i++) 3205 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3206 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3207 3208 return 0; 3209 } 3210 3211 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3212 { 3213 int r; 3214 3215 if (amdgpu_sriov_vf(adev)) { 3216 gfx_v9_0_init_csb(adev); 3217 return 0; 3218 } 3219 3220 adev->gfx.rlc.funcs->stop(adev); 3221 3222 /* disable CG */ 3223 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3224 3225 gfx_v9_0_init_pg(adev); 3226 3227 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3228 /* legacy rlc firmware loading */ 3229 r = gfx_v9_0_rlc_load_microcode(adev); 3230 if (r) 3231 return r; 3232 } 3233 3234 switch (adev->ip_versions[GC_HWIP][0]) { 3235 case IP_VERSION(9, 2, 2): 3236 case IP_VERSION(9, 1, 0): 3237 if (amdgpu_lbpw == 0) 3238 gfx_v9_0_enable_lbpw(adev, false); 3239 else 3240 gfx_v9_0_enable_lbpw(adev, true); 3241 break; 3242 case IP_VERSION(9, 4, 0): 3243 if (amdgpu_lbpw > 0) 3244 gfx_v9_0_enable_lbpw(adev, true); 3245 else 3246 gfx_v9_0_enable_lbpw(adev, false); 3247 break; 3248 default: 3249 break; 3250 } 3251 3252 adev->gfx.rlc.funcs->start(adev); 3253 3254 return 0; 3255 } 3256 3257 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3258 { 3259 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3260 3261 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3262 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3263 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3264 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3265 udelay(50); 3266 } 3267 3268 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3269 { 3270 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3271 const struct gfx_firmware_header_v1_0 *ce_hdr; 3272 const struct gfx_firmware_header_v1_0 *me_hdr; 3273 const __le32 *fw_data; 3274 unsigned i, fw_size; 3275 3276 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3277 return -EINVAL; 3278 3279 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3280 adev->gfx.pfp_fw->data; 3281 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3282 adev->gfx.ce_fw->data; 3283 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3284 adev->gfx.me_fw->data; 3285 3286 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3287 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3288 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3289 3290 gfx_v9_0_cp_gfx_enable(adev, false); 3291 3292 /* PFP */ 3293 fw_data = (const __le32 *) 3294 (adev->gfx.pfp_fw->data + 3295 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3296 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3297 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3298 for (i = 0; i < fw_size; i++) 3299 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3300 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3301 3302 /* CE */ 3303 fw_data = (const __le32 *) 3304 (adev->gfx.ce_fw->data + 3305 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3306 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3307 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3308 for (i = 0; i < fw_size; i++) 3309 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3310 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3311 3312 /* ME */ 3313 fw_data = (const __le32 *) 3314 (adev->gfx.me_fw->data + 3315 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3316 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3317 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3318 for (i = 0; i < fw_size; i++) 3319 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3320 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3321 3322 return 0; 3323 } 3324 3325 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3326 { 3327 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3328 const struct cs_section_def *sect = NULL; 3329 const struct cs_extent_def *ext = NULL; 3330 int r, i, tmp; 3331 3332 /* init the CP */ 3333 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3334 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3335 3336 gfx_v9_0_cp_gfx_enable(adev, true); 3337 3338 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3339 if (r) { 3340 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3341 return r; 3342 } 3343 3344 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3345 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3346 3347 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3348 amdgpu_ring_write(ring, 0x80000000); 3349 amdgpu_ring_write(ring, 0x80000000); 3350 3351 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3352 for (ext = sect->section; ext->extent != NULL; ++ext) { 3353 if (sect->id == SECT_CONTEXT) { 3354 amdgpu_ring_write(ring, 3355 PACKET3(PACKET3_SET_CONTEXT_REG, 3356 ext->reg_count)); 3357 amdgpu_ring_write(ring, 3358 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3359 for (i = 0; i < ext->reg_count; i++) 3360 amdgpu_ring_write(ring, ext->extent[i]); 3361 } 3362 } 3363 } 3364 3365 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3366 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3367 3368 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3369 amdgpu_ring_write(ring, 0); 3370 3371 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3372 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3373 amdgpu_ring_write(ring, 0x8000); 3374 amdgpu_ring_write(ring, 0x8000); 3375 3376 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3377 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3378 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3379 amdgpu_ring_write(ring, tmp); 3380 amdgpu_ring_write(ring, 0); 3381 3382 amdgpu_ring_commit(ring); 3383 3384 return 0; 3385 } 3386 3387 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3388 { 3389 struct amdgpu_ring *ring; 3390 u32 tmp; 3391 u32 rb_bufsz; 3392 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3393 3394 /* Set the write pointer delay */ 3395 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3396 3397 /* set the RB to use vmid 0 */ 3398 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3399 3400 /* Set ring buffer size */ 3401 ring = &adev->gfx.gfx_ring[0]; 3402 rb_bufsz = order_base_2(ring->ring_size / 8); 3403 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3404 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3405 #ifdef __BIG_ENDIAN 3406 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3407 #endif 3408 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3409 3410 /* Initialize the ring buffer's write pointers */ 3411 ring->wptr = 0; 3412 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3413 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3414 3415 /* set the wb address wether it's enabled or not */ 3416 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3417 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3418 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3419 3420 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3421 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3422 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3423 3424 mdelay(1); 3425 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3426 3427 rb_addr = ring->gpu_addr >> 8; 3428 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3429 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3430 3431 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3432 if (ring->use_doorbell) { 3433 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3434 DOORBELL_OFFSET, ring->doorbell_index); 3435 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3436 DOORBELL_EN, 1); 3437 } else { 3438 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3439 } 3440 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3441 3442 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3443 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3444 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3445 3446 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3447 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3448 3449 3450 /* start the ring */ 3451 gfx_v9_0_cp_gfx_start(adev); 3452 ring->sched.ready = true; 3453 3454 return 0; 3455 } 3456 3457 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3458 { 3459 if (enable) { 3460 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3461 } else { 3462 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3463 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3464 adev->gfx.kiq.ring.sched.ready = false; 3465 } 3466 udelay(50); 3467 } 3468 3469 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3470 { 3471 const struct gfx_firmware_header_v1_0 *mec_hdr; 3472 const __le32 *fw_data; 3473 unsigned i; 3474 u32 tmp; 3475 3476 if (!adev->gfx.mec_fw) 3477 return -EINVAL; 3478 3479 gfx_v9_0_cp_compute_enable(adev, false); 3480 3481 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3482 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3483 3484 fw_data = (const __le32 *) 3485 (adev->gfx.mec_fw->data + 3486 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3487 tmp = 0; 3488 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3489 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3490 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3491 3492 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3493 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3494 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3495 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3496 3497 /* MEC1 */ 3498 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3499 mec_hdr->jt_offset); 3500 for (i = 0; i < mec_hdr->jt_size; i++) 3501 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3502 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3503 3504 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3505 adev->gfx.mec_fw_version); 3506 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3507 3508 return 0; 3509 } 3510 3511 /* KIQ functions */ 3512 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3513 { 3514 uint32_t tmp; 3515 struct amdgpu_device *adev = ring->adev; 3516 3517 /* tell RLC which is KIQ queue */ 3518 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3519 tmp &= 0xffffff00; 3520 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3521 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3522 tmp |= 0x80; 3523 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3524 } 3525 3526 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3527 { 3528 struct amdgpu_device *adev = ring->adev; 3529 3530 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3531 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3532 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3533 mqd->cp_hqd_queue_priority = 3534 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3535 } 3536 } 3537 } 3538 3539 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3540 { 3541 struct amdgpu_device *adev = ring->adev; 3542 struct v9_mqd *mqd = ring->mqd_ptr; 3543 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3544 uint32_t tmp; 3545 3546 mqd->header = 0xC0310800; 3547 mqd->compute_pipelinestat_enable = 0x00000001; 3548 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3549 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3550 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3551 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3552 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3553 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3554 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3555 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3556 mqd->compute_misc_reserved = 0x00000003; 3557 3558 mqd->dynamic_cu_mask_addr_lo = 3559 lower_32_bits(ring->mqd_gpu_addr 3560 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3561 mqd->dynamic_cu_mask_addr_hi = 3562 upper_32_bits(ring->mqd_gpu_addr 3563 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3564 3565 eop_base_addr = ring->eop_gpu_addr >> 8; 3566 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3567 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3568 3569 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3570 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3571 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3572 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3573 3574 mqd->cp_hqd_eop_control = tmp; 3575 3576 /* enable doorbell? */ 3577 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3578 3579 if (ring->use_doorbell) { 3580 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3581 DOORBELL_OFFSET, ring->doorbell_index); 3582 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3583 DOORBELL_EN, 1); 3584 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3585 DOORBELL_SOURCE, 0); 3586 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3587 DOORBELL_HIT, 0); 3588 } else { 3589 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3590 DOORBELL_EN, 0); 3591 } 3592 3593 mqd->cp_hqd_pq_doorbell_control = tmp; 3594 3595 /* disable the queue if it's active */ 3596 ring->wptr = 0; 3597 mqd->cp_hqd_dequeue_request = 0; 3598 mqd->cp_hqd_pq_rptr = 0; 3599 mqd->cp_hqd_pq_wptr_lo = 0; 3600 mqd->cp_hqd_pq_wptr_hi = 0; 3601 3602 /* set the pointer to the MQD */ 3603 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3604 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3605 3606 /* set MQD vmid to 0 */ 3607 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3608 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3609 mqd->cp_mqd_control = tmp; 3610 3611 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3612 hqd_gpu_addr = ring->gpu_addr >> 8; 3613 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3614 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3615 3616 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3617 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3618 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3619 (order_base_2(ring->ring_size / 4) - 1)); 3620 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3621 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3622 #ifdef __BIG_ENDIAN 3623 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3624 #endif 3625 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3627 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3629 mqd->cp_hqd_pq_control = tmp; 3630 3631 /* set the wb address whether it's enabled or not */ 3632 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3633 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3634 mqd->cp_hqd_pq_rptr_report_addr_hi = 3635 upper_32_bits(wb_gpu_addr) & 0xffff; 3636 3637 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3638 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3639 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3640 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3641 3642 tmp = 0; 3643 /* enable the doorbell if requested */ 3644 if (ring->use_doorbell) { 3645 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3646 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3647 DOORBELL_OFFSET, ring->doorbell_index); 3648 3649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3650 DOORBELL_EN, 1); 3651 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3652 DOORBELL_SOURCE, 0); 3653 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3654 DOORBELL_HIT, 0); 3655 } 3656 3657 mqd->cp_hqd_pq_doorbell_control = tmp; 3658 3659 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3660 ring->wptr = 0; 3661 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3662 3663 /* set the vmid for the queue */ 3664 mqd->cp_hqd_vmid = 0; 3665 3666 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3667 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3668 mqd->cp_hqd_persistent_state = tmp; 3669 3670 /* set MIN_IB_AVAIL_SIZE */ 3671 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3672 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3673 mqd->cp_hqd_ib_control = tmp; 3674 3675 /* set static priority for a queue/ring */ 3676 gfx_v9_0_mqd_set_priority(ring, mqd); 3677 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3678 3679 /* map_queues packet doesn't need activate the queue, 3680 * so only kiq need set this field. 3681 */ 3682 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3683 mqd->cp_hqd_active = 1; 3684 3685 return 0; 3686 } 3687 3688 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3689 { 3690 struct amdgpu_device *adev = ring->adev; 3691 struct v9_mqd *mqd = ring->mqd_ptr; 3692 int j; 3693 3694 /* disable wptr polling */ 3695 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3696 3697 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3698 mqd->cp_hqd_eop_base_addr_lo); 3699 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3700 mqd->cp_hqd_eop_base_addr_hi); 3701 3702 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3703 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3704 mqd->cp_hqd_eop_control); 3705 3706 /* enable doorbell? */ 3707 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3708 mqd->cp_hqd_pq_doorbell_control); 3709 3710 /* disable the queue if it's active */ 3711 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3712 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3713 for (j = 0; j < adev->usec_timeout; j++) { 3714 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3715 break; 3716 udelay(1); 3717 } 3718 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3719 mqd->cp_hqd_dequeue_request); 3720 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3721 mqd->cp_hqd_pq_rptr); 3722 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3723 mqd->cp_hqd_pq_wptr_lo); 3724 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3725 mqd->cp_hqd_pq_wptr_hi); 3726 } 3727 3728 /* set the pointer to the MQD */ 3729 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3730 mqd->cp_mqd_base_addr_lo); 3731 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3732 mqd->cp_mqd_base_addr_hi); 3733 3734 /* set MQD vmid to 0 */ 3735 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3736 mqd->cp_mqd_control); 3737 3738 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3739 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3740 mqd->cp_hqd_pq_base_lo); 3741 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3742 mqd->cp_hqd_pq_base_hi); 3743 3744 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3746 mqd->cp_hqd_pq_control); 3747 3748 /* set the wb address whether it's enabled or not */ 3749 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3750 mqd->cp_hqd_pq_rptr_report_addr_lo); 3751 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3752 mqd->cp_hqd_pq_rptr_report_addr_hi); 3753 3754 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3756 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3757 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3758 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3759 3760 /* enable the doorbell if requested */ 3761 if (ring->use_doorbell) { 3762 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3763 (adev->doorbell_index.kiq * 2) << 2); 3764 /* If GC has entered CGPG, ringing doorbell > first page 3765 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3766 * workaround this issue. And this change has to align with firmware 3767 * update. 3768 */ 3769 if (check_if_enlarge_doorbell_range(adev)) 3770 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3771 (adev->doorbell.size - 4)); 3772 else 3773 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3774 (adev->doorbell_index.userqueue_end * 2) << 2); 3775 } 3776 3777 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3778 mqd->cp_hqd_pq_doorbell_control); 3779 3780 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3781 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3782 mqd->cp_hqd_pq_wptr_lo); 3783 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3784 mqd->cp_hqd_pq_wptr_hi); 3785 3786 /* set the vmid for the queue */ 3787 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3788 3789 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3790 mqd->cp_hqd_persistent_state); 3791 3792 /* activate the queue */ 3793 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3794 mqd->cp_hqd_active); 3795 3796 if (ring->use_doorbell) 3797 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3798 3799 return 0; 3800 } 3801 3802 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3803 { 3804 struct amdgpu_device *adev = ring->adev; 3805 int j; 3806 3807 /* disable the queue if it's active */ 3808 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3809 3810 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3811 3812 for (j = 0; j < adev->usec_timeout; j++) { 3813 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3814 break; 3815 udelay(1); 3816 } 3817 3818 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3819 DRM_DEBUG("KIQ dequeue request failed.\n"); 3820 3821 /* Manual disable if dequeue request times out */ 3822 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3823 } 3824 3825 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3826 0); 3827 } 3828 3829 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3830 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3831 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3832 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3833 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3834 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3835 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3836 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3837 3838 return 0; 3839 } 3840 3841 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3842 { 3843 struct amdgpu_device *adev = ring->adev; 3844 struct v9_mqd *mqd = ring->mqd_ptr; 3845 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3846 struct v9_mqd *tmp_mqd; 3847 3848 gfx_v9_0_kiq_setting(ring); 3849 3850 /* GPU could be in bad state during probe, driver trigger the reset 3851 * after load the SMU, in this case , the mqd is not be initialized. 3852 * driver need to re-init the mqd. 3853 * check mqd->cp_hqd_pq_control since this value should not be 0 3854 */ 3855 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3856 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3857 /* for GPU_RESET case , reset MQD to a clean status */ 3858 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3859 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3860 3861 /* reset ring buffer */ 3862 ring->wptr = 0; 3863 amdgpu_ring_clear_ring(ring); 3864 3865 mutex_lock(&adev->srbm_mutex); 3866 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3867 gfx_v9_0_kiq_init_register(ring); 3868 soc15_grbm_select(adev, 0, 0, 0, 0); 3869 mutex_unlock(&adev->srbm_mutex); 3870 } else { 3871 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3872 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3873 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3874 mutex_lock(&adev->srbm_mutex); 3875 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3876 gfx_v9_0_mqd_init(ring); 3877 gfx_v9_0_kiq_init_register(ring); 3878 soc15_grbm_select(adev, 0, 0, 0, 0); 3879 mutex_unlock(&adev->srbm_mutex); 3880 3881 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3882 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3883 } 3884 3885 return 0; 3886 } 3887 3888 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3889 { 3890 struct amdgpu_device *adev = ring->adev; 3891 struct v9_mqd *mqd = ring->mqd_ptr; 3892 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3893 struct v9_mqd *tmp_mqd; 3894 3895 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3896 * is not be initialized before 3897 */ 3898 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3899 3900 if (!tmp_mqd->cp_hqd_pq_control || 3901 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3902 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3903 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3904 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3905 mutex_lock(&adev->srbm_mutex); 3906 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3907 gfx_v9_0_mqd_init(ring); 3908 soc15_grbm_select(adev, 0, 0, 0, 0); 3909 mutex_unlock(&adev->srbm_mutex); 3910 3911 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3912 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3913 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 3914 /* reset MQD to a clean status */ 3915 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3916 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3917 3918 /* reset ring buffer */ 3919 ring->wptr = 0; 3920 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 3921 amdgpu_ring_clear_ring(ring); 3922 } else { 3923 amdgpu_ring_clear_ring(ring); 3924 } 3925 3926 return 0; 3927 } 3928 3929 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3930 { 3931 struct amdgpu_ring *ring; 3932 int r; 3933 3934 ring = &adev->gfx.kiq.ring; 3935 3936 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3937 if (unlikely(r != 0)) 3938 return r; 3939 3940 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3941 if (unlikely(r != 0)) 3942 return r; 3943 3944 gfx_v9_0_kiq_init_queue(ring); 3945 amdgpu_bo_kunmap(ring->mqd_obj); 3946 ring->mqd_ptr = NULL; 3947 amdgpu_bo_unreserve(ring->mqd_obj); 3948 ring->sched.ready = true; 3949 return 0; 3950 } 3951 3952 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3953 { 3954 struct amdgpu_ring *ring = NULL; 3955 int r = 0, i; 3956 3957 gfx_v9_0_cp_compute_enable(adev, true); 3958 3959 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3960 ring = &adev->gfx.compute_ring[i]; 3961 3962 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3963 if (unlikely(r != 0)) 3964 goto done; 3965 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3966 if (!r) { 3967 r = gfx_v9_0_kcq_init_queue(ring); 3968 amdgpu_bo_kunmap(ring->mqd_obj); 3969 ring->mqd_ptr = NULL; 3970 } 3971 amdgpu_bo_unreserve(ring->mqd_obj); 3972 if (r) 3973 goto done; 3974 } 3975 3976 r = amdgpu_gfx_enable_kcq(adev); 3977 done: 3978 return r; 3979 } 3980 3981 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3982 { 3983 int r, i; 3984 struct amdgpu_ring *ring; 3985 3986 if (!(adev->flags & AMD_IS_APU)) 3987 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3988 3989 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3990 if (adev->gfx.num_gfx_rings) { 3991 /* legacy firmware loading */ 3992 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3993 if (r) 3994 return r; 3995 } 3996 3997 r = gfx_v9_0_cp_compute_load_microcode(adev); 3998 if (r) 3999 return r; 4000 } 4001 4002 r = gfx_v9_0_kiq_resume(adev); 4003 if (r) 4004 return r; 4005 4006 if (adev->gfx.num_gfx_rings) { 4007 r = gfx_v9_0_cp_gfx_resume(adev); 4008 if (r) 4009 return r; 4010 } 4011 4012 r = gfx_v9_0_kcq_resume(adev); 4013 if (r) 4014 return r; 4015 4016 if (adev->gfx.num_gfx_rings) { 4017 ring = &adev->gfx.gfx_ring[0]; 4018 r = amdgpu_ring_test_helper(ring); 4019 if (r) 4020 return r; 4021 } 4022 4023 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4024 ring = &adev->gfx.compute_ring[i]; 4025 amdgpu_ring_test_helper(ring); 4026 } 4027 4028 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 4029 4030 return 0; 4031 } 4032 4033 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 4034 { 4035 u32 tmp; 4036 4037 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) && 4038 adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)) 4039 return; 4040 4041 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 4042 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 4043 adev->df.hash_status.hash_64k); 4044 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 4045 adev->df.hash_status.hash_2m); 4046 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 4047 adev->df.hash_status.hash_1g); 4048 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 4049 } 4050 4051 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 4052 { 4053 if (adev->gfx.num_gfx_rings) 4054 gfx_v9_0_cp_gfx_enable(adev, enable); 4055 gfx_v9_0_cp_compute_enable(adev, enable); 4056 } 4057 4058 static int gfx_v9_0_hw_init(void *handle) 4059 { 4060 int r; 4061 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4062 4063 if (!amdgpu_sriov_vf(adev)) 4064 gfx_v9_0_init_golden_registers(adev); 4065 4066 gfx_v9_0_constants_init(adev); 4067 4068 gfx_v9_0_init_tcp_config(adev); 4069 4070 r = adev->gfx.rlc.funcs->resume(adev); 4071 if (r) 4072 return r; 4073 4074 r = gfx_v9_0_cp_resume(adev); 4075 if (r) 4076 return r; 4077 4078 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 4079 gfx_v9_4_2_set_power_brake_sequence(adev); 4080 4081 return r; 4082 } 4083 4084 static int gfx_v9_0_hw_fini(void *handle) 4085 { 4086 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4087 4088 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4089 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4090 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4091 4092 /* DF freeze and kcq disable will fail */ 4093 if (!amdgpu_ras_intr_triggered()) 4094 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4095 amdgpu_gfx_disable_kcq(adev); 4096 4097 if (amdgpu_sriov_vf(adev)) { 4098 gfx_v9_0_cp_gfx_enable(adev, false); 4099 /* must disable polling for SRIOV when hw finished, otherwise 4100 * CPC engine may still keep fetching WB address which is already 4101 * invalid after sw finished and trigger DMAR reading error in 4102 * hypervisor side. 4103 */ 4104 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4105 return 0; 4106 } 4107 4108 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4109 * otherwise KIQ is hanging when binding back 4110 */ 4111 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4112 mutex_lock(&adev->srbm_mutex); 4113 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 4114 adev->gfx.kiq.ring.pipe, 4115 adev->gfx.kiq.ring.queue, 0); 4116 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 4117 soc15_grbm_select(adev, 0, 0, 0, 0); 4118 mutex_unlock(&adev->srbm_mutex); 4119 } 4120 4121 gfx_v9_0_cp_enable(adev, false); 4122 4123 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 4124 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 4125 (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) { 4126 dev_dbg(adev->dev, "Skipping RLC halt\n"); 4127 return 0; 4128 } 4129 4130 adev->gfx.rlc.funcs->stop(adev); 4131 return 0; 4132 } 4133 4134 static int gfx_v9_0_suspend(void *handle) 4135 { 4136 return gfx_v9_0_hw_fini(handle); 4137 } 4138 4139 static int gfx_v9_0_resume(void *handle) 4140 { 4141 return gfx_v9_0_hw_init(handle); 4142 } 4143 4144 static bool gfx_v9_0_is_idle(void *handle) 4145 { 4146 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4147 4148 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4149 GRBM_STATUS, GUI_ACTIVE)) 4150 return false; 4151 else 4152 return true; 4153 } 4154 4155 static int gfx_v9_0_wait_for_idle(void *handle) 4156 { 4157 unsigned i; 4158 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4159 4160 for (i = 0; i < adev->usec_timeout; i++) { 4161 if (gfx_v9_0_is_idle(handle)) 4162 return 0; 4163 udelay(1); 4164 } 4165 return -ETIMEDOUT; 4166 } 4167 4168 static int gfx_v9_0_soft_reset(void *handle) 4169 { 4170 u32 grbm_soft_reset = 0; 4171 u32 tmp; 4172 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4173 4174 /* GRBM_STATUS */ 4175 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4176 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4177 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4178 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4179 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4180 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4181 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4182 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4183 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4184 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4185 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4186 } 4187 4188 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4189 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4190 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4191 } 4192 4193 /* GRBM_STATUS2 */ 4194 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4195 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4196 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4197 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4198 4199 4200 if (grbm_soft_reset) { 4201 /* stop the rlc */ 4202 adev->gfx.rlc.funcs->stop(adev); 4203 4204 if (adev->gfx.num_gfx_rings) 4205 /* Disable GFX parsing/prefetching */ 4206 gfx_v9_0_cp_gfx_enable(adev, false); 4207 4208 /* Disable MEC parsing/prefetching */ 4209 gfx_v9_0_cp_compute_enable(adev, false); 4210 4211 if (grbm_soft_reset) { 4212 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4213 tmp |= grbm_soft_reset; 4214 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4215 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4216 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4217 4218 udelay(50); 4219 4220 tmp &= ~grbm_soft_reset; 4221 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4222 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4223 } 4224 4225 /* Wait a little for things to settle down */ 4226 udelay(50); 4227 } 4228 return 0; 4229 } 4230 4231 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4232 { 4233 signed long r, cnt = 0; 4234 unsigned long flags; 4235 uint32_t seq, reg_val_offs = 0; 4236 uint64_t value = 0; 4237 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4238 struct amdgpu_ring *ring = &kiq->ring; 4239 4240 BUG_ON(!ring->funcs->emit_rreg); 4241 4242 spin_lock_irqsave(&kiq->ring_lock, flags); 4243 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4244 pr_err("critical bug! too many kiq readers\n"); 4245 goto failed_unlock; 4246 } 4247 amdgpu_ring_alloc(ring, 32); 4248 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4249 amdgpu_ring_write(ring, 9 | /* src: register*/ 4250 (5 << 8) | /* dst: memory */ 4251 (1 << 16) | /* count sel */ 4252 (1 << 20)); /* write confirm */ 4253 amdgpu_ring_write(ring, 0); 4254 amdgpu_ring_write(ring, 0); 4255 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4256 reg_val_offs * 4)); 4257 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4258 reg_val_offs * 4)); 4259 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4260 if (r) 4261 goto failed_undo; 4262 4263 amdgpu_ring_commit(ring); 4264 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4265 4266 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4267 4268 /* don't wait anymore for gpu reset case because this way may 4269 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4270 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4271 * never return if we keep waiting in virt_kiq_rreg, which cause 4272 * gpu_recover() hang there. 4273 * 4274 * also don't wait anymore for IRQ context 4275 * */ 4276 if (r < 1 && (amdgpu_in_reset(adev))) 4277 goto failed_kiq_read; 4278 4279 might_sleep(); 4280 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4281 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4282 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4283 } 4284 4285 if (cnt > MAX_KIQ_REG_TRY) 4286 goto failed_kiq_read; 4287 4288 mb(); 4289 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4290 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4291 amdgpu_device_wb_free(adev, reg_val_offs); 4292 return value; 4293 4294 failed_undo: 4295 amdgpu_ring_undo(ring); 4296 failed_unlock: 4297 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4298 failed_kiq_read: 4299 if (reg_val_offs) 4300 amdgpu_device_wb_free(adev, reg_val_offs); 4301 pr_err("failed to read gpu clock\n"); 4302 return ~0; 4303 } 4304 4305 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4306 { 4307 uint64_t clock, clock_lo, clock_hi, hi_check; 4308 4309 switch (adev->ip_versions[GC_HWIP][0]) { 4310 case IP_VERSION(9, 3, 0): 4311 preempt_disable(); 4312 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4313 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4314 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4315 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4316 * roughly every 42 seconds. 4317 */ 4318 if (hi_check != clock_hi) { 4319 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4320 clock_hi = hi_check; 4321 } 4322 preempt_enable(); 4323 clock = clock_lo | (clock_hi << 32ULL); 4324 break; 4325 default: 4326 amdgpu_gfx_off_ctrl(adev, false); 4327 mutex_lock(&adev->gfx.gpu_clock_mutex); 4328 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) { 4329 clock = gfx_v9_0_kiq_read_clock(adev); 4330 } else { 4331 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4332 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4333 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4334 } 4335 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4336 amdgpu_gfx_off_ctrl(adev, true); 4337 break; 4338 } 4339 return clock; 4340 } 4341 4342 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4343 uint32_t vmid, 4344 uint32_t gds_base, uint32_t gds_size, 4345 uint32_t gws_base, uint32_t gws_size, 4346 uint32_t oa_base, uint32_t oa_size) 4347 { 4348 struct amdgpu_device *adev = ring->adev; 4349 4350 /* GDS Base */ 4351 gfx_v9_0_write_data_to_reg(ring, 0, false, 4352 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4353 gds_base); 4354 4355 /* GDS Size */ 4356 gfx_v9_0_write_data_to_reg(ring, 0, false, 4357 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4358 gds_size); 4359 4360 /* GWS */ 4361 gfx_v9_0_write_data_to_reg(ring, 0, false, 4362 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4363 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4364 4365 /* OA */ 4366 gfx_v9_0_write_data_to_reg(ring, 0, false, 4367 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4368 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4369 } 4370 4371 static const u32 vgpr_init_compute_shader[] = 4372 { 4373 0xb07c0000, 0xbe8000ff, 4374 0x000000f8, 0xbf110800, 4375 0x7e000280, 0x7e020280, 4376 0x7e040280, 0x7e060280, 4377 0x7e080280, 0x7e0a0280, 4378 0x7e0c0280, 0x7e0e0280, 4379 0x80808800, 0xbe803200, 4380 0xbf84fff5, 0xbf9c0000, 4381 0xd28c0001, 0x0001007f, 4382 0xd28d0001, 0x0002027e, 4383 0x10020288, 0xb8810904, 4384 0xb7814000, 0xd1196a01, 4385 0x00000301, 0xbe800087, 4386 0xbefc00c1, 0xd89c4000, 4387 0x00020201, 0xd89cc080, 4388 0x00040401, 0x320202ff, 4389 0x00000800, 0x80808100, 4390 0xbf84fff8, 0x7e020280, 4391 0xbf810000, 0x00000000, 4392 }; 4393 4394 static const u32 sgpr_init_compute_shader[] = 4395 { 4396 0xb07c0000, 0xbe8000ff, 4397 0x0000005f, 0xbee50080, 4398 0xbe812c65, 0xbe822c65, 4399 0xbe832c65, 0xbe842c65, 4400 0xbe852c65, 0xb77c0005, 4401 0x80808500, 0xbf84fff8, 4402 0xbe800080, 0xbf810000, 4403 }; 4404 4405 static const u32 vgpr_init_compute_shader_arcturus[] = { 4406 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4407 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4408 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4409 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4410 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4411 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4412 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4413 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4414 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4415 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4416 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4417 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4418 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4419 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4420 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4421 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4422 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4423 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4424 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4425 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4426 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4427 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4428 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4429 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4430 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4431 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4432 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4433 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4434 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4435 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4436 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4437 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4438 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4439 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4440 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4441 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4442 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4443 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4444 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4445 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4446 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4447 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4448 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4449 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4450 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4451 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4452 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4453 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4454 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4455 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4456 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4457 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4458 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4459 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4460 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4461 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4462 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4463 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4464 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4465 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4466 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4467 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4468 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4469 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4470 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4471 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4472 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4473 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4474 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4475 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4476 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4477 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4478 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4479 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4480 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4481 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4482 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4483 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4484 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4485 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4486 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4487 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4488 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4489 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4490 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4491 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4492 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4493 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4494 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4495 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4496 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4497 0xbf84fff8, 0xbf810000, 4498 }; 4499 4500 /* When below register arrays changed, please update gpr_reg_size, 4501 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4502 to cover all gfx9 ASICs */ 4503 static const struct soc15_reg_entry vgpr_init_regs[] = { 4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4509 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4510 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4511 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4512 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4513 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4514 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4515 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4516 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4517 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4518 }; 4519 4520 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4521 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4522 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4523 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4524 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4525 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4526 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4527 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4528 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4529 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4530 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4531 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4532 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4533 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4534 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4535 }; 4536 4537 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4538 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4539 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4540 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4541 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4542 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4543 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4544 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4545 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4546 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4547 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4548 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4549 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4550 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4551 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4552 }; 4553 4554 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4555 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4556 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4557 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4558 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4559 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4560 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4561 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4562 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4563 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4564 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4565 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4566 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4567 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4568 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4569 }; 4570 4571 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4572 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4573 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4574 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4575 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4576 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4577 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4578 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4579 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4580 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4581 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4582 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4583 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4584 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4585 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4586 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4587 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4588 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4589 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4590 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4591 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4592 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4593 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4594 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4595 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4596 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4597 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4598 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4599 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4600 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4601 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4602 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4603 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4604 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4605 }; 4606 4607 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4608 { 4609 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4610 int i, r; 4611 4612 /* only support when RAS is enabled */ 4613 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4614 return 0; 4615 4616 r = amdgpu_ring_alloc(ring, 7); 4617 if (r) { 4618 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4619 ring->name, r); 4620 return r; 4621 } 4622 4623 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4624 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4625 4626 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4627 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4628 PACKET3_DMA_DATA_DST_SEL(1) | 4629 PACKET3_DMA_DATA_SRC_SEL(2) | 4630 PACKET3_DMA_DATA_ENGINE(0))); 4631 amdgpu_ring_write(ring, 0); 4632 amdgpu_ring_write(ring, 0); 4633 amdgpu_ring_write(ring, 0); 4634 amdgpu_ring_write(ring, 0); 4635 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4636 adev->gds.gds_size); 4637 4638 amdgpu_ring_commit(ring); 4639 4640 for (i = 0; i < adev->usec_timeout; i++) { 4641 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4642 break; 4643 udelay(1); 4644 } 4645 4646 if (i >= adev->usec_timeout) 4647 r = -ETIMEDOUT; 4648 4649 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4650 4651 return r; 4652 } 4653 4654 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4655 { 4656 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4657 struct amdgpu_ib ib; 4658 struct dma_fence *f = NULL; 4659 int r, i; 4660 unsigned total_size, vgpr_offset, sgpr_offset; 4661 u64 gpu_addr; 4662 4663 int compute_dim_x = adev->gfx.config.max_shader_engines * 4664 adev->gfx.config.max_cu_per_sh * 4665 adev->gfx.config.max_sh_per_se; 4666 int sgpr_work_group_size = 5; 4667 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4668 int vgpr_init_shader_size; 4669 const u32 *vgpr_init_shader_ptr; 4670 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4671 4672 /* only support when RAS is enabled */ 4673 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4674 return 0; 4675 4676 /* bail if the compute ring is not ready */ 4677 if (!ring->sched.ready) 4678 return 0; 4679 4680 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) { 4681 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4682 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4683 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4684 } else { 4685 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4686 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4687 vgpr_init_regs_ptr = vgpr_init_regs; 4688 } 4689 4690 total_size = 4691 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4692 total_size += 4693 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4694 total_size += 4695 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4696 total_size = ALIGN(total_size, 256); 4697 vgpr_offset = total_size; 4698 total_size += ALIGN(vgpr_init_shader_size, 256); 4699 sgpr_offset = total_size; 4700 total_size += sizeof(sgpr_init_compute_shader); 4701 4702 /* allocate an indirect buffer to put the commands in */ 4703 memset(&ib, 0, sizeof(ib)); 4704 r = amdgpu_ib_get(adev, NULL, total_size, 4705 AMDGPU_IB_POOL_DIRECT, &ib); 4706 if (r) { 4707 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4708 return r; 4709 } 4710 4711 /* load the compute shaders */ 4712 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4713 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4714 4715 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4716 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4717 4718 /* init the ib length to 0 */ 4719 ib.length_dw = 0; 4720 4721 /* VGPR */ 4722 /* write the register state for the compute dispatch */ 4723 for (i = 0; i < gpr_reg_size; i++) { 4724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4725 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4726 - PACKET3_SET_SH_REG_START; 4727 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4728 } 4729 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4730 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4732 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4733 - PACKET3_SET_SH_REG_START; 4734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4736 4737 /* write dispatch packet */ 4738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4739 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4740 ib.ptr[ib.length_dw++] = 1; /* y */ 4741 ib.ptr[ib.length_dw++] = 1; /* z */ 4742 ib.ptr[ib.length_dw++] = 4743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4744 4745 /* write CS partial flush packet */ 4746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4748 4749 /* SGPR1 */ 4750 /* write the register state for the compute dispatch */ 4751 for (i = 0; i < gpr_reg_size; i++) { 4752 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4753 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4754 - PACKET3_SET_SH_REG_START; 4755 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4756 } 4757 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4758 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4760 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4761 - PACKET3_SET_SH_REG_START; 4762 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4763 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4764 4765 /* write dispatch packet */ 4766 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4767 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4768 ib.ptr[ib.length_dw++] = 1; /* y */ 4769 ib.ptr[ib.length_dw++] = 1; /* z */ 4770 ib.ptr[ib.length_dw++] = 4771 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4772 4773 /* write CS partial flush packet */ 4774 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4775 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4776 4777 /* SGPR2 */ 4778 /* write the register state for the compute dispatch */ 4779 for (i = 0; i < gpr_reg_size; i++) { 4780 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4781 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4782 - PACKET3_SET_SH_REG_START; 4783 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4784 } 4785 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4786 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4787 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4788 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4789 - PACKET3_SET_SH_REG_START; 4790 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4791 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4792 4793 /* write dispatch packet */ 4794 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4795 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4796 ib.ptr[ib.length_dw++] = 1; /* y */ 4797 ib.ptr[ib.length_dw++] = 1; /* z */ 4798 ib.ptr[ib.length_dw++] = 4799 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4800 4801 /* write CS partial flush packet */ 4802 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4803 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4804 4805 /* shedule the ib on the ring */ 4806 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4807 if (r) { 4808 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4809 goto fail; 4810 } 4811 4812 /* wait for the GPU to finish processing the IB */ 4813 r = dma_fence_wait(f, false); 4814 if (r) { 4815 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4816 goto fail; 4817 } 4818 4819 fail: 4820 amdgpu_ib_free(adev, &ib, NULL); 4821 dma_fence_put(f); 4822 4823 return r; 4824 } 4825 4826 static int gfx_v9_0_early_init(void *handle) 4827 { 4828 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4829 4830 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || 4831 adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 4832 adev->gfx.num_gfx_rings = 0; 4833 else 4834 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4835 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4836 AMDGPU_MAX_COMPUTE_RINGS); 4837 gfx_v9_0_set_kiq_pm4_funcs(adev); 4838 gfx_v9_0_set_ring_funcs(adev); 4839 gfx_v9_0_set_irq_funcs(adev); 4840 gfx_v9_0_set_gds_init(adev); 4841 gfx_v9_0_set_rlc_funcs(adev); 4842 4843 return 0; 4844 } 4845 4846 static int gfx_v9_0_ecc_late_init(void *handle) 4847 { 4848 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4849 int r; 4850 4851 /* 4852 * Temp workaround to fix the issue that CP firmware fails to 4853 * update read pointer when CPDMA is writing clearing operation 4854 * to GDS in suspend/resume sequence on several cards. So just 4855 * limit this operation in cold boot sequence. 4856 */ 4857 if ((!adev->in_suspend) && 4858 (adev->gds.gds_size)) { 4859 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4860 if (r) 4861 return r; 4862 } 4863 4864 /* requires IBs so do in late init after IB pool is initialized */ 4865 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) 4866 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4867 else 4868 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4869 4870 if (r) 4871 return r; 4872 4873 if (adev->gfx.ras_funcs && 4874 adev->gfx.ras_funcs->ras_late_init) { 4875 r = adev->gfx.ras_funcs->ras_late_init(adev); 4876 if (r) 4877 return r; 4878 } 4879 4880 if (adev->gfx.ras_funcs && 4881 adev->gfx.ras_funcs->enable_watchdog_timer) 4882 adev->gfx.ras_funcs->enable_watchdog_timer(adev); 4883 4884 return 0; 4885 } 4886 4887 static int gfx_v9_0_late_init(void *handle) 4888 { 4889 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4890 int r; 4891 4892 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4893 if (r) 4894 return r; 4895 4896 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4897 if (r) 4898 return r; 4899 4900 r = gfx_v9_0_ecc_late_init(handle); 4901 if (r) 4902 return r; 4903 4904 return 0; 4905 } 4906 4907 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4908 { 4909 uint32_t rlc_setting; 4910 4911 /* if RLC is not enabled, do nothing */ 4912 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4913 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4914 return false; 4915 4916 return true; 4917 } 4918 4919 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4920 { 4921 uint32_t data; 4922 unsigned i; 4923 4924 data = RLC_SAFE_MODE__CMD_MASK; 4925 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4926 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4927 4928 /* wait for RLC_SAFE_MODE */ 4929 for (i = 0; i < adev->usec_timeout; i++) { 4930 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4931 break; 4932 udelay(1); 4933 } 4934 } 4935 4936 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4937 { 4938 uint32_t data; 4939 4940 data = RLC_SAFE_MODE__CMD_MASK; 4941 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4942 } 4943 4944 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4945 bool enable) 4946 { 4947 amdgpu_gfx_rlc_enter_safe_mode(adev); 4948 4949 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4950 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4951 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4952 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4953 } else { 4954 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4955 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4956 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4957 } 4958 4959 amdgpu_gfx_rlc_exit_safe_mode(adev); 4960 } 4961 4962 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4963 bool enable) 4964 { 4965 /* TODO: double check if we need to perform under safe mode */ 4966 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4967 4968 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4969 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4970 else 4971 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4972 4973 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4974 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4975 else 4976 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4977 4978 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4979 } 4980 4981 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4982 bool enable) 4983 { 4984 uint32_t data, def; 4985 4986 amdgpu_gfx_rlc_enter_safe_mode(adev); 4987 4988 /* It is disabled by HW by default */ 4989 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4990 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4991 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4992 4993 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1)) 4994 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4995 4996 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4997 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4998 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4999 5000 /* only for Vega10 & Raven1 */ 5001 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 5002 5003 if (def != data) 5004 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5005 5006 /* MGLS is a global flag to control all MGLS in GFX */ 5007 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5008 /* 2 - RLC memory Light sleep */ 5009 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5010 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 5011 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5012 if (def != data) 5013 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 5014 } 5015 /* 3 - CP memory Light sleep */ 5016 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 5017 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 5018 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5019 if (def != data) 5020 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5021 } 5022 } 5023 } else { 5024 /* 1 - MGCG_OVERRIDE */ 5025 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5026 5027 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1)) 5028 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 5029 5030 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5031 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5032 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 5033 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 5034 5035 if (def != data) 5036 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5037 5038 /* 2 - disable MGLS in RLC */ 5039 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 5040 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5041 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5042 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 5043 } 5044 5045 /* 3 - disable MGLS in CP */ 5046 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 5047 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5048 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5049 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5050 } 5051 } 5052 5053 amdgpu_gfx_rlc_exit_safe_mode(adev); 5054 } 5055 5056 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 5057 bool enable) 5058 { 5059 uint32_t data, def; 5060 5061 if (!adev->gfx.num_gfx_rings) 5062 return; 5063 5064 amdgpu_gfx_rlc_enter_safe_mode(adev); 5065 5066 /* Enable 3D CGCG/CGLS */ 5067 if (enable) { 5068 /* write cmd to clear cgcg/cgls ov */ 5069 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5070 /* unset CGCG override */ 5071 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5072 /* update CGCG and CGLS override bits */ 5073 if (def != data) 5074 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5075 5076 /* enable 3Dcgcg FSM(0x0000363f) */ 5077 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5078 5079 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5080 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5081 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5082 else 5083 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 5084 5085 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5086 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5087 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5088 if (def != data) 5089 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5090 5091 /* set IDLE_POLL_COUNT(0x00900100) */ 5092 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5093 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5094 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5095 if (def != data) 5096 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5097 } else { 5098 /* Disable CGCG/CGLS */ 5099 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5100 /* disable cgcg, cgls should be disabled */ 5101 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 5102 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 5103 /* disable cgcg and cgls in FSM */ 5104 if (def != data) 5105 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5106 } 5107 5108 amdgpu_gfx_rlc_exit_safe_mode(adev); 5109 } 5110 5111 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5112 bool enable) 5113 { 5114 uint32_t def, data; 5115 5116 amdgpu_gfx_rlc_enter_safe_mode(adev); 5117 5118 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5119 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5120 /* unset CGCG override */ 5121 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5122 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5123 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5124 else 5125 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5126 /* update CGCG and CGLS override bits */ 5127 if (def != data) 5128 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5129 5130 /* enable cgcg FSM(0x0000363F) */ 5131 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5132 5133 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) 5134 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5135 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5136 else 5137 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5138 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5139 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5140 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5141 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5142 if (def != data) 5143 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5144 5145 /* set IDLE_POLL_COUNT(0x00900100) */ 5146 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5147 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5148 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5149 if (def != data) 5150 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5151 } else { 5152 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5153 /* reset CGCG/CGLS bits */ 5154 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5155 /* disable cgcg and cgls in FSM */ 5156 if (def != data) 5157 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5158 } 5159 5160 amdgpu_gfx_rlc_exit_safe_mode(adev); 5161 } 5162 5163 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5164 bool enable) 5165 { 5166 if (enable) { 5167 /* CGCG/CGLS should be enabled after MGCG/MGLS 5168 * === MGCG + MGLS === 5169 */ 5170 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5171 /* === CGCG /CGLS for GFX 3D Only === */ 5172 gfx_v9_0_update_3d_clock_gating(adev, enable); 5173 /* === CGCG + CGLS === */ 5174 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5175 } else { 5176 /* CGCG/CGLS should be disabled before MGCG/MGLS 5177 * === CGCG + CGLS === 5178 */ 5179 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5180 /* === CGCG /CGLS for GFX 3D Only === */ 5181 gfx_v9_0_update_3d_clock_gating(adev, enable); 5182 /* === MGCG + MGLS === */ 5183 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5184 } 5185 return 0; 5186 } 5187 5188 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5189 { 5190 u32 reg, data; 5191 5192 amdgpu_gfx_off_ctrl(adev, false); 5193 5194 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5195 if (amdgpu_sriov_is_pp_one_vf(adev)) 5196 data = RREG32_NO_KIQ(reg); 5197 else 5198 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5199 5200 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5201 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5202 5203 if (amdgpu_sriov_is_pp_one_vf(adev)) 5204 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5205 else 5206 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5207 5208 amdgpu_gfx_off_ctrl(adev, true); 5209 } 5210 5211 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5212 uint32_t offset, 5213 struct soc15_reg_rlcg *entries, int arr_size) 5214 { 5215 int i; 5216 uint32_t reg; 5217 5218 if (!entries) 5219 return false; 5220 5221 for (i = 0; i < arr_size; i++) { 5222 const struct soc15_reg_rlcg *entry; 5223 5224 entry = &entries[i]; 5225 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5226 if (offset == reg) 5227 return true; 5228 } 5229 5230 return false; 5231 } 5232 5233 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5234 { 5235 return gfx_v9_0_check_rlcg_range(adev, offset, 5236 (void *)rlcg_access_gc_9_0, 5237 ARRAY_SIZE(rlcg_access_gc_9_0)); 5238 } 5239 5240 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5241 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5242 .set_safe_mode = gfx_v9_0_set_safe_mode, 5243 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5244 .init = gfx_v9_0_rlc_init, 5245 .get_csb_size = gfx_v9_0_get_csb_size, 5246 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5247 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5248 .resume = gfx_v9_0_rlc_resume, 5249 .stop = gfx_v9_0_rlc_stop, 5250 .reset = gfx_v9_0_rlc_reset, 5251 .start = gfx_v9_0_rlc_start, 5252 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5253 .sriov_wreg = gfx_v9_0_sriov_wreg, 5254 .sriov_rreg = gfx_v9_0_sriov_rreg, 5255 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5256 }; 5257 5258 static int gfx_v9_0_set_powergating_state(void *handle, 5259 enum amd_powergating_state state) 5260 { 5261 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5262 bool enable = (state == AMD_PG_STATE_GATE); 5263 5264 switch (adev->ip_versions[GC_HWIP][0]) { 5265 case IP_VERSION(9, 2, 2): 5266 case IP_VERSION(9, 1, 0): 5267 case IP_VERSION(9, 3, 0): 5268 if (!enable) 5269 amdgpu_gfx_off_ctrl(adev, false); 5270 5271 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5272 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5273 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5274 } else { 5275 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5276 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5277 } 5278 5279 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5280 gfx_v9_0_enable_cp_power_gating(adev, true); 5281 else 5282 gfx_v9_0_enable_cp_power_gating(adev, false); 5283 5284 /* update gfx cgpg state */ 5285 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5286 5287 /* update mgcg state */ 5288 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5289 5290 if (enable) 5291 amdgpu_gfx_off_ctrl(adev, true); 5292 break; 5293 case IP_VERSION(9, 2, 1): 5294 amdgpu_gfx_off_ctrl(adev, enable); 5295 break; 5296 default: 5297 break; 5298 } 5299 5300 return 0; 5301 } 5302 5303 static int gfx_v9_0_set_clockgating_state(void *handle, 5304 enum amd_clockgating_state state) 5305 { 5306 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5307 5308 if (amdgpu_sriov_vf(adev)) 5309 return 0; 5310 5311 switch (adev->ip_versions[GC_HWIP][0]) { 5312 case IP_VERSION(9, 0, 1): 5313 case IP_VERSION(9, 2, 1): 5314 case IP_VERSION(9, 4, 0): 5315 case IP_VERSION(9, 2, 2): 5316 case IP_VERSION(9, 1, 0): 5317 case IP_VERSION(9, 4, 1): 5318 case IP_VERSION(9, 3, 0): 5319 case IP_VERSION(9, 4, 2): 5320 gfx_v9_0_update_gfx_clock_gating(adev, 5321 state == AMD_CG_STATE_GATE); 5322 break; 5323 default: 5324 break; 5325 } 5326 return 0; 5327 } 5328 5329 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 5330 { 5331 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5332 int data; 5333 5334 if (amdgpu_sriov_vf(adev)) 5335 *flags = 0; 5336 5337 /* AMD_CG_SUPPORT_GFX_MGCG */ 5338 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5339 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5340 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5341 5342 /* AMD_CG_SUPPORT_GFX_CGCG */ 5343 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5344 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5345 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5346 5347 /* AMD_CG_SUPPORT_GFX_CGLS */ 5348 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5349 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5350 5351 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5352 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5353 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5354 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5355 5356 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5357 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5358 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5359 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5360 5361 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) { 5362 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5363 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5364 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5365 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5366 5367 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5368 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5369 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5370 } 5371 } 5372 5373 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5374 { 5375 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 5376 } 5377 5378 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5379 { 5380 struct amdgpu_device *adev = ring->adev; 5381 u64 wptr; 5382 5383 /* XXX check if swapping is necessary on BE */ 5384 if (ring->use_doorbell) { 5385 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 5386 } else { 5387 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5388 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5389 } 5390 5391 return wptr; 5392 } 5393 5394 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5395 { 5396 struct amdgpu_device *adev = ring->adev; 5397 5398 if (ring->use_doorbell) { 5399 /* XXX check if swapping is necessary on BE */ 5400 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5401 WDOORBELL64(ring->doorbell_index, ring->wptr); 5402 } else { 5403 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5404 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5405 } 5406 } 5407 5408 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5409 { 5410 struct amdgpu_device *adev = ring->adev; 5411 u32 ref_and_mask, reg_mem_engine; 5412 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5413 5414 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5415 switch (ring->me) { 5416 case 1: 5417 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5418 break; 5419 case 2: 5420 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5421 break; 5422 default: 5423 return; 5424 } 5425 reg_mem_engine = 0; 5426 } else { 5427 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5428 reg_mem_engine = 1; /* pfp */ 5429 } 5430 5431 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5432 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5433 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5434 ref_and_mask, ref_and_mask, 0x20); 5435 } 5436 5437 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5438 struct amdgpu_job *job, 5439 struct amdgpu_ib *ib, 5440 uint32_t flags) 5441 { 5442 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5443 u32 header, control = 0; 5444 5445 if (ib->flags & AMDGPU_IB_FLAG_CE) 5446 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5447 else 5448 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5449 5450 control |= ib->length_dw | (vmid << 24); 5451 5452 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5453 control |= INDIRECT_BUFFER_PRE_ENB(1); 5454 5455 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5456 gfx_v9_0_ring_emit_de_meta(ring); 5457 } 5458 5459 amdgpu_ring_write(ring, header); 5460 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5461 amdgpu_ring_write(ring, 5462 #ifdef __BIG_ENDIAN 5463 (2 << 0) | 5464 #endif 5465 lower_32_bits(ib->gpu_addr)); 5466 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5467 amdgpu_ring_write(ring, control); 5468 } 5469 5470 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5471 struct amdgpu_job *job, 5472 struct amdgpu_ib *ib, 5473 uint32_t flags) 5474 { 5475 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5476 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5477 5478 /* Currently, there is a high possibility to get wave ID mismatch 5479 * between ME and GDS, leading to a hw deadlock, because ME generates 5480 * different wave IDs than the GDS expects. This situation happens 5481 * randomly when at least 5 compute pipes use GDS ordered append. 5482 * The wave IDs generated by ME are also wrong after suspend/resume. 5483 * Those are probably bugs somewhere else in the kernel driver. 5484 * 5485 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5486 * GDS to 0 for this ring (me/pipe). 5487 */ 5488 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5489 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5490 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5491 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5492 } 5493 5494 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5495 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5496 amdgpu_ring_write(ring, 5497 #ifdef __BIG_ENDIAN 5498 (2 << 0) | 5499 #endif 5500 lower_32_bits(ib->gpu_addr)); 5501 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5502 amdgpu_ring_write(ring, control); 5503 } 5504 5505 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5506 u64 seq, unsigned flags) 5507 { 5508 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5509 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5510 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5511 5512 /* RELEASE_MEM - flush caches, send int */ 5513 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5514 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5515 EOP_TC_NC_ACTION_EN) : 5516 (EOP_TCL1_ACTION_EN | 5517 EOP_TC_ACTION_EN | 5518 EOP_TC_WB_ACTION_EN | 5519 EOP_TC_MD_ACTION_EN)) | 5520 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5521 EVENT_INDEX(5))); 5522 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5523 5524 /* 5525 * the address should be Qword aligned if 64bit write, Dword 5526 * aligned if only send 32bit data low (discard data high) 5527 */ 5528 if (write64bit) 5529 BUG_ON(addr & 0x7); 5530 else 5531 BUG_ON(addr & 0x3); 5532 amdgpu_ring_write(ring, lower_32_bits(addr)); 5533 amdgpu_ring_write(ring, upper_32_bits(addr)); 5534 amdgpu_ring_write(ring, lower_32_bits(seq)); 5535 amdgpu_ring_write(ring, upper_32_bits(seq)); 5536 amdgpu_ring_write(ring, 0); 5537 } 5538 5539 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5540 { 5541 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5542 uint32_t seq = ring->fence_drv.sync_seq; 5543 uint64_t addr = ring->fence_drv.gpu_addr; 5544 5545 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5546 lower_32_bits(addr), upper_32_bits(addr), 5547 seq, 0xffffffff, 4); 5548 } 5549 5550 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5551 unsigned vmid, uint64_t pd_addr) 5552 { 5553 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5554 5555 /* compute doesn't have PFP */ 5556 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5557 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5558 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5559 amdgpu_ring_write(ring, 0x0); 5560 } 5561 } 5562 5563 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5564 { 5565 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5566 } 5567 5568 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5569 { 5570 u64 wptr; 5571 5572 /* XXX check if swapping is necessary on BE */ 5573 if (ring->use_doorbell) 5574 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5575 else 5576 BUG(); 5577 return wptr; 5578 } 5579 5580 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5581 { 5582 struct amdgpu_device *adev = ring->adev; 5583 5584 /* XXX check if swapping is necessary on BE */ 5585 if (ring->use_doorbell) { 5586 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5587 WDOORBELL64(ring->doorbell_index, ring->wptr); 5588 } else{ 5589 BUG(); /* only DOORBELL method supported on gfx9 now */ 5590 } 5591 } 5592 5593 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5594 u64 seq, unsigned int flags) 5595 { 5596 struct amdgpu_device *adev = ring->adev; 5597 5598 /* we only allocate 32bit for each seq wb address */ 5599 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5600 5601 /* write fence seq to the "addr" */ 5602 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5603 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5604 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5605 amdgpu_ring_write(ring, lower_32_bits(addr)); 5606 amdgpu_ring_write(ring, upper_32_bits(addr)); 5607 amdgpu_ring_write(ring, lower_32_bits(seq)); 5608 5609 if (flags & AMDGPU_FENCE_FLAG_INT) { 5610 /* set register to trigger INT */ 5611 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5612 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5613 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5614 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5615 amdgpu_ring_write(ring, 0); 5616 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5617 } 5618 } 5619 5620 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5621 { 5622 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5623 amdgpu_ring_write(ring, 0); 5624 } 5625 5626 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5627 { 5628 struct v9_ce_ib_state ce_payload = {0}; 5629 uint64_t csa_addr; 5630 int cnt; 5631 5632 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5633 csa_addr = amdgpu_csa_vaddr(ring->adev); 5634 5635 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5636 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5637 WRITE_DATA_DST_SEL(8) | 5638 WR_CONFIRM) | 5639 WRITE_DATA_CACHE_POLICY(0)); 5640 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5641 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5642 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5643 } 5644 5645 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5646 { 5647 struct v9_de_ib_state de_payload = {0}; 5648 uint64_t csa_addr, gds_addr; 5649 int cnt; 5650 5651 csa_addr = amdgpu_csa_vaddr(ring->adev); 5652 gds_addr = csa_addr + 4096; 5653 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5654 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5655 5656 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5657 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5658 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5659 WRITE_DATA_DST_SEL(8) | 5660 WR_CONFIRM) | 5661 WRITE_DATA_CACHE_POLICY(0)); 5662 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5663 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5664 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5665 } 5666 5667 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5668 bool secure) 5669 { 5670 uint32_t v = secure ? FRAME_TMZ : 0; 5671 5672 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5673 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5674 } 5675 5676 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5677 { 5678 uint32_t dw2 = 0; 5679 5680 if (amdgpu_sriov_vf(ring->adev)) 5681 gfx_v9_0_ring_emit_ce_meta(ring); 5682 5683 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5684 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5685 /* set load_global_config & load_global_uconfig */ 5686 dw2 |= 0x8001; 5687 /* set load_cs_sh_regs */ 5688 dw2 |= 0x01000000; 5689 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5690 dw2 |= 0x10002; 5691 5692 /* set load_ce_ram if preamble presented */ 5693 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5694 dw2 |= 0x10000000; 5695 } else { 5696 /* still load_ce_ram if this is the first time preamble presented 5697 * although there is no context switch happens. 5698 */ 5699 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5700 dw2 |= 0x10000000; 5701 } 5702 5703 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5704 amdgpu_ring_write(ring, dw2); 5705 amdgpu_ring_write(ring, 0); 5706 } 5707 5708 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5709 { 5710 unsigned ret; 5711 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5712 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5713 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5714 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5715 ret = ring->wptr & ring->buf_mask; 5716 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5717 return ret; 5718 } 5719 5720 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5721 { 5722 unsigned cur; 5723 BUG_ON(offset > ring->buf_mask); 5724 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5725 5726 cur = (ring->wptr & ring->buf_mask) - 1; 5727 if (likely(cur > offset)) 5728 ring->ring[offset] = cur - offset; 5729 else 5730 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5731 } 5732 5733 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5734 uint32_t reg_val_offs) 5735 { 5736 struct amdgpu_device *adev = ring->adev; 5737 5738 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5739 amdgpu_ring_write(ring, 0 | /* src: register*/ 5740 (5 << 8) | /* dst: memory */ 5741 (1 << 20)); /* write confirm */ 5742 amdgpu_ring_write(ring, reg); 5743 amdgpu_ring_write(ring, 0); 5744 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5745 reg_val_offs * 4)); 5746 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5747 reg_val_offs * 4)); 5748 } 5749 5750 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5751 uint32_t val) 5752 { 5753 uint32_t cmd = 0; 5754 5755 switch (ring->funcs->type) { 5756 case AMDGPU_RING_TYPE_GFX: 5757 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5758 break; 5759 case AMDGPU_RING_TYPE_KIQ: 5760 cmd = (1 << 16); /* no inc addr */ 5761 break; 5762 default: 5763 cmd = WR_CONFIRM; 5764 break; 5765 } 5766 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5767 amdgpu_ring_write(ring, cmd); 5768 amdgpu_ring_write(ring, reg); 5769 amdgpu_ring_write(ring, 0); 5770 amdgpu_ring_write(ring, val); 5771 } 5772 5773 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5774 uint32_t val, uint32_t mask) 5775 { 5776 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5777 } 5778 5779 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5780 uint32_t reg0, uint32_t reg1, 5781 uint32_t ref, uint32_t mask) 5782 { 5783 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5784 struct amdgpu_device *adev = ring->adev; 5785 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5786 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5787 5788 if (fw_version_ok) 5789 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5790 ref, mask, 0x20); 5791 else 5792 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5793 ref, mask); 5794 } 5795 5796 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5797 { 5798 struct amdgpu_device *adev = ring->adev; 5799 uint32_t value = 0; 5800 5801 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5802 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5803 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5804 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5805 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5806 } 5807 5808 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5809 enum amdgpu_interrupt_state state) 5810 { 5811 switch (state) { 5812 case AMDGPU_IRQ_STATE_DISABLE: 5813 case AMDGPU_IRQ_STATE_ENABLE: 5814 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5815 TIME_STAMP_INT_ENABLE, 5816 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5817 break; 5818 default: 5819 break; 5820 } 5821 } 5822 5823 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5824 int me, int pipe, 5825 enum amdgpu_interrupt_state state) 5826 { 5827 u32 mec_int_cntl, mec_int_cntl_reg; 5828 5829 /* 5830 * amdgpu controls only the first MEC. That's why this function only 5831 * handles the setting of interrupts for this specific MEC. All other 5832 * pipes' interrupts are set by amdkfd. 5833 */ 5834 5835 if (me == 1) { 5836 switch (pipe) { 5837 case 0: 5838 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5839 break; 5840 case 1: 5841 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5842 break; 5843 case 2: 5844 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5845 break; 5846 case 3: 5847 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5848 break; 5849 default: 5850 DRM_DEBUG("invalid pipe %d\n", pipe); 5851 return; 5852 } 5853 } else { 5854 DRM_DEBUG("invalid me %d\n", me); 5855 return; 5856 } 5857 5858 switch (state) { 5859 case AMDGPU_IRQ_STATE_DISABLE: 5860 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 5861 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5862 TIME_STAMP_INT_ENABLE, 0); 5863 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5864 break; 5865 case AMDGPU_IRQ_STATE_ENABLE: 5866 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5867 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5868 TIME_STAMP_INT_ENABLE, 1); 5869 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5870 break; 5871 default: 5872 break; 5873 } 5874 } 5875 5876 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5877 struct amdgpu_irq_src *source, 5878 unsigned type, 5879 enum amdgpu_interrupt_state state) 5880 { 5881 switch (state) { 5882 case AMDGPU_IRQ_STATE_DISABLE: 5883 case AMDGPU_IRQ_STATE_ENABLE: 5884 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5885 PRIV_REG_INT_ENABLE, 5886 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5887 break; 5888 default: 5889 break; 5890 } 5891 5892 return 0; 5893 } 5894 5895 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5896 struct amdgpu_irq_src *source, 5897 unsigned type, 5898 enum amdgpu_interrupt_state state) 5899 { 5900 switch (state) { 5901 case AMDGPU_IRQ_STATE_DISABLE: 5902 case AMDGPU_IRQ_STATE_ENABLE: 5903 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5904 PRIV_INSTR_INT_ENABLE, 5905 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5906 break; 5907 default: 5908 break; 5909 } 5910 5911 return 0; 5912 } 5913 5914 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5915 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5916 CP_ECC_ERROR_INT_ENABLE, 1) 5917 5918 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5919 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5920 CP_ECC_ERROR_INT_ENABLE, 0) 5921 5922 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5923 struct amdgpu_irq_src *source, 5924 unsigned type, 5925 enum amdgpu_interrupt_state state) 5926 { 5927 switch (state) { 5928 case AMDGPU_IRQ_STATE_DISABLE: 5929 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5930 CP_ECC_ERROR_INT_ENABLE, 0); 5931 DISABLE_ECC_ON_ME_PIPE(1, 0); 5932 DISABLE_ECC_ON_ME_PIPE(1, 1); 5933 DISABLE_ECC_ON_ME_PIPE(1, 2); 5934 DISABLE_ECC_ON_ME_PIPE(1, 3); 5935 break; 5936 5937 case AMDGPU_IRQ_STATE_ENABLE: 5938 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5939 CP_ECC_ERROR_INT_ENABLE, 1); 5940 ENABLE_ECC_ON_ME_PIPE(1, 0); 5941 ENABLE_ECC_ON_ME_PIPE(1, 1); 5942 ENABLE_ECC_ON_ME_PIPE(1, 2); 5943 ENABLE_ECC_ON_ME_PIPE(1, 3); 5944 break; 5945 default: 5946 break; 5947 } 5948 5949 return 0; 5950 } 5951 5952 5953 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5954 struct amdgpu_irq_src *src, 5955 unsigned type, 5956 enum amdgpu_interrupt_state state) 5957 { 5958 switch (type) { 5959 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5960 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5961 break; 5962 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5963 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5964 break; 5965 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5966 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5967 break; 5968 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5969 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5970 break; 5971 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5972 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5973 break; 5974 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5975 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5976 break; 5977 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5978 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5979 break; 5980 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5981 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5982 break; 5983 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5984 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5985 break; 5986 default: 5987 break; 5988 } 5989 return 0; 5990 } 5991 5992 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5993 struct amdgpu_irq_src *source, 5994 struct amdgpu_iv_entry *entry) 5995 { 5996 int i; 5997 u8 me_id, pipe_id, queue_id; 5998 struct amdgpu_ring *ring; 5999 6000 DRM_DEBUG("IH: CP EOP\n"); 6001 me_id = (entry->ring_id & 0x0c) >> 2; 6002 pipe_id = (entry->ring_id & 0x03) >> 0; 6003 queue_id = (entry->ring_id & 0x70) >> 4; 6004 6005 switch (me_id) { 6006 case 0: 6007 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6008 break; 6009 case 1: 6010 case 2: 6011 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6012 ring = &adev->gfx.compute_ring[i]; 6013 /* Per-queue interrupt is supported for MEC starting from VI. 6014 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6015 */ 6016 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6017 amdgpu_fence_process(ring); 6018 } 6019 break; 6020 } 6021 return 0; 6022 } 6023 6024 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6025 struct amdgpu_iv_entry *entry) 6026 { 6027 u8 me_id, pipe_id, queue_id; 6028 struct amdgpu_ring *ring; 6029 int i; 6030 6031 me_id = (entry->ring_id & 0x0c) >> 2; 6032 pipe_id = (entry->ring_id & 0x03) >> 0; 6033 queue_id = (entry->ring_id & 0x70) >> 4; 6034 6035 switch (me_id) { 6036 case 0: 6037 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6038 break; 6039 case 1: 6040 case 2: 6041 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6042 ring = &adev->gfx.compute_ring[i]; 6043 if (ring->me == me_id && ring->pipe == pipe_id && 6044 ring->queue == queue_id) 6045 drm_sched_fault(&ring->sched); 6046 } 6047 break; 6048 } 6049 } 6050 6051 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6052 struct amdgpu_irq_src *source, 6053 struct amdgpu_iv_entry *entry) 6054 { 6055 DRM_ERROR("Illegal register access in command stream\n"); 6056 gfx_v9_0_fault(adev, entry); 6057 return 0; 6058 } 6059 6060 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6061 struct amdgpu_irq_src *source, 6062 struct amdgpu_iv_entry *entry) 6063 { 6064 DRM_ERROR("Illegal instruction in command stream\n"); 6065 gfx_v9_0_fault(adev, entry); 6066 return 0; 6067 } 6068 6069 6070 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6071 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6072 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6073 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6074 }, 6075 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6076 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6077 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6078 }, 6079 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6080 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6081 0, 0 6082 }, 6083 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6084 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6085 0, 0 6086 }, 6087 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6088 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6089 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6090 }, 6091 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6092 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6093 0, 0 6094 }, 6095 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6096 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6097 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6098 }, 6099 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6100 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6101 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6102 }, 6103 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6104 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6105 0, 0 6106 }, 6107 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6108 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6109 0, 0 6110 }, 6111 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6112 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6113 0, 0 6114 }, 6115 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6116 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6117 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6118 }, 6119 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6120 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6121 0, 0 6122 }, 6123 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6124 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6125 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6126 }, 6127 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6128 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6129 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6130 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6131 }, 6132 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6133 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6134 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6135 0, 0 6136 }, 6137 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6138 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6139 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6140 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6141 }, 6142 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6143 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6144 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6145 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6146 }, 6147 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6148 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6149 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6150 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6151 }, 6152 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6153 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6154 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6155 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6156 }, 6157 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6158 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6159 0, 0 6160 }, 6161 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6162 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6163 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6164 }, 6165 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6166 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6167 0, 0 6168 }, 6169 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6170 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6171 0, 0 6172 }, 6173 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6174 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6175 0, 0 6176 }, 6177 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6178 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6179 0, 0 6180 }, 6181 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6182 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6183 0, 0 6184 }, 6185 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6186 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6187 0, 0 6188 }, 6189 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6190 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6191 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6192 }, 6193 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6194 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6195 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6196 }, 6197 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6198 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6199 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6200 }, 6201 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6202 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6203 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6204 }, 6205 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6206 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6207 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6208 }, 6209 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6210 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6211 0, 0 6212 }, 6213 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6214 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6215 0, 0 6216 }, 6217 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6218 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6219 0, 0 6220 }, 6221 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6222 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6223 0, 0 6224 }, 6225 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6226 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6227 0, 0 6228 }, 6229 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6230 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6231 0, 0 6232 }, 6233 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6234 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6235 0, 0 6236 }, 6237 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6238 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6239 0, 0 6240 }, 6241 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6242 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6243 0, 0 6244 }, 6245 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6246 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6247 0, 0 6248 }, 6249 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6250 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6251 0, 0 6252 }, 6253 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6254 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6255 0, 0 6256 }, 6257 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6258 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6259 0, 0 6260 }, 6261 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6262 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6263 0, 0 6264 }, 6265 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6266 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6267 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6268 }, 6269 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6270 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6271 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6272 }, 6273 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6274 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6275 0, 0 6276 }, 6277 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6278 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6279 0, 0 6280 }, 6281 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6282 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6283 0, 0 6284 }, 6285 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6286 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6287 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6288 }, 6289 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6290 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6291 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6292 }, 6293 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6294 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6295 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6296 }, 6297 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6298 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6299 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6300 }, 6301 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6302 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6303 0, 0 6304 }, 6305 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6306 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6307 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6308 }, 6309 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6310 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6311 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6312 }, 6313 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6314 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6315 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6316 }, 6317 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6318 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6319 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6320 }, 6321 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6322 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6323 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6324 }, 6325 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6326 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6327 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6328 }, 6329 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6330 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6331 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6332 }, 6333 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6334 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6335 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6336 }, 6337 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6338 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6339 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6340 }, 6341 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6342 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6343 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6344 }, 6345 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6346 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6347 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6348 }, 6349 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6350 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6351 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6352 }, 6353 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6354 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6355 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6356 }, 6357 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6358 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6359 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6360 }, 6361 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6362 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6363 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6364 }, 6365 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6366 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6367 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6368 }, 6369 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6370 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6371 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6372 }, 6373 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6374 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6375 0, 0 6376 }, 6377 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6378 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6379 0, 0 6380 }, 6381 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6382 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6383 0, 0 6384 }, 6385 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6386 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6387 0, 0 6388 }, 6389 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6390 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6391 0, 0 6392 }, 6393 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6394 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6395 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6396 }, 6397 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6398 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6399 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6400 }, 6401 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6402 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6403 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6404 }, 6405 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6406 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6407 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6408 }, 6409 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6410 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6411 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6412 }, 6413 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6414 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6415 0, 0 6416 }, 6417 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6418 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6419 0, 0 6420 }, 6421 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6422 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6423 0, 0 6424 }, 6425 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6426 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6427 0, 0 6428 }, 6429 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6430 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6431 0, 0 6432 }, 6433 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6434 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6435 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6436 }, 6437 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6438 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6439 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6440 }, 6441 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6442 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6443 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6444 }, 6445 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6446 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6447 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6448 }, 6449 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6450 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6451 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6452 }, 6453 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6454 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6455 0, 0 6456 }, 6457 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6458 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6459 0, 0 6460 }, 6461 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6462 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6463 0, 0 6464 }, 6465 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6466 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6467 0, 0 6468 }, 6469 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6470 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6471 0, 0 6472 }, 6473 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6474 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6475 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6476 }, 6477 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6478 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6479 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6480 }, 6481 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6482 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6483 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6484 }, 6485 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6486 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6487 0, 0 6488 }, 6489 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6490 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6491 0, 0 6492 }, 6493 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6494 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6495 0, 0 6496 }, 6497 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6498 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6499 0, 0 6500 }, 6501 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6502 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6503 0, 0 6504 }, 6505 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6506 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6507 0, 0 6508 } 6509 }; 6510 6511 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6512 void *inject_if) 6513 { 6514 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6515 int ret; 6516 struct ta_ras_trigger_error_input block_info = { 0 }; 6517 6518 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6519 return -EINVAL; 6520 6521 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6522 return -EINVAL; 6523 6524 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6525 return -EPERM; 6526 6527 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6528 info->head.type)) { 6529 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6530 ras_gfx_subblocks[info->head.sub_block_index].name, 6531 info->head.type); 6532 return -EPERM; 6533 } 6534 6535 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6536 info->head.type)) { 6537 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6538 ras_gfx_subblocks[info->head.sub_block_index].name, 6539 info->head.type); 6540 return -EPERM; 6541 } 6542 6543 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6544 block_info.sub_block_index = 6545 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6546 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6547 block_info.address = info->address; 6548 block_info.value = info->value; 6549 6550 mutex_lock(&adev->grbm_idx_mutex); 6551 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6552 mutex_unlock(&adev->grbm_idx_mutex); 6553 6554 return ret; 6555 } 6556 6557 static const char *vml2_mems[] = { 6558 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6559 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6560 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6561 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6562 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6563 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6564 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6565 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6566 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6567 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6568 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6569 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6570 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6571 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6572 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6573 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6574 }; 6575 6576 static const char *vml2_walker_mems[] = { 6577 "UTC_VML2_CACHE_PDE0_MEM0", 6578 "UTC_VML2_CACHE_PDE0_MEM1", 6579 "UTC_VML2_CACHE_PDE1_MEM0", 6580 "UTC_VML2_CACHE_PDE1_MEM1", 6581 "UTC_VML2_CACHE_PDE2_MEM0", 6582 "UTC_VML2_CACHE_PDE2_MEM1", 6583 "UTC_VML2_RDIF_LOG_FIFO", 6584 }; 6585 6586 static const char *atc_l2_cache_2m_mems[] = { 6587 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6588 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6589 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6590 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6591 }; 6592 6593 static const char *atc_l2_cache_4k_mems[] = { 6594 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6595 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6596 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6597 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6598 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6599 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6600 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6601 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6602 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6603 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6604 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6605 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6606 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6607 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6608 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6609 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6610 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6611 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6612 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6613 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6614 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6615 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6616 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6617 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6618 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6619 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6620 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6621 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6622 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6623 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6624 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6625 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6626 }; 6627 6628 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6629 struct ras_err_data *err_data) 6630 { 6631 uint32_t i, data; 6632 uint32_t sec_count, ded_count; 6633 6634 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6635 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6636 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6637 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6638 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6639 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6640 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6641 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6642 6643 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6644 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6645 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6646 6647 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6648 if (sec_count) { 6649 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6650 "SEC %d\n", i, vml2_mems[i], sec_count); 6651 err_data->ce_count += sec_count; 6652 } 6653 6654 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6655 if (ded_count) { 6656 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6657 "DED %d\n", i, vml2_mems[i], ded_count); 6658 err_data->ue_count += ded_count; 6659 } 6660 } 6661 6662 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6663 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6664 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6665 6666 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6667 SEC_COUNT); 6668 if (sec_count) { 6669 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6670 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6671 err_data->ce_count += sec_count; 6672 } 6673 6674 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6675 DED_COUNT); 6676 if (ded_count) { 6677 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6678 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6679 err_data->ue_count += ded_count; 6680 } 6681 } 6682 6683 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6684 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6685 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6686 6687 sec_count = (data & 0x00006000L) >> 0xd; 6688 if (sec_count) { 6689 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6690 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6691 sec_count); 6692 err_data->ce_count += sec_count; 6693 } 6694 } 6695 6696 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6697 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6698 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6699 6700 sec_count = (data & 0x00006000L) >> 0xd; 6701 if (sec_count) { 6702 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6703 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6704 sec_count); 6705 err_data->ce_count += sec_count; 6706 } 6707 6708 ded_count = (data & 0x00018000L) >> 0xf; 6709 if (ded_count) { 6710 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6711 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6712 ded_count); 6713 err_data->ue_count += ded_count; 6714 } 6715 } 6716 6717 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6718 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6719 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6720 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6721 6722 return 0; 6723 } 6724 6725 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6726 const struct soc15_reg_entry *reg, 6727 uint32_t se_id, uint32_t inst_id, uint32_t value, 6728 uint32_t *sec_count, uint32_t *ded_count) 6729 { 6730 uint32_t i; 6731 uint32_t sec_cnt, ded_cnt; 6732 6733 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6734 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6735 gfx_v9_0_ras_fields[i].seg != reg->seg || 6736 gfx_v9_0_ras_fields[i].inst != reg->inst) 6737 continue; 6738 6739 sec_cnt = (value & 6740 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6741 gfx_v9_0_ras_fields[i].sec_count_shift; 6742 if (sec_cnt) { 6743 dev_info(adev->dev, "GFX SubBlock %s, " 6744 "Instance[%d][%d], SEC %d\n", 6745 gfx_v9_0_ras_fields[i].name, 6746 se_id, inst_id, 6747 sec_cnt); 6748 *sec_count += sec_cnt; 6749 } 6750 6751 ded_cnt = (value & 6752 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6753 gfx_v9_0_ras_fields[i].ded_count_shift; 6754 if (ded_cnt) { 6755 dev_info(adev->dev, "GFX SubBlock %s, " 6756 "Instance[%d][%d], DED %d\n", 6757 gfx_v9_0_ras_fields[i].name, 6758 se_id, inst_id, 6759 ded_cnt); 6760 *ded_count += ded_cnt; 6761 } 6762 } 6763 6764 return 0; 6765 } 6766 6767 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6768 { 6769 int i, j, k; 6770 6771 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6772 return; 6773 6774 /* read back registers to clear the counters */ 6775 mutex_lock(&adev->grbm_idx_mutex); 6776 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6777 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6778 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6779 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 6780 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6781 } 6782 } 6783 } 6784 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6785 mutex_unlock(&adev->grbm_idx_mutex); 6786 6787 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6788 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6789 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6790 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6791 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6792 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6793 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6794 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6795 6796 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6797 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6798 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6799 } 6800 6801 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6802 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6803 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6804 } 6805 6806 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6807 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6808 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6809 } 6810 6811 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6812 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6813 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6814 } 6815 6816 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6817 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6818 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6819 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6820 } 6821 6822 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6823 void *ras_error_status) 6824 { 6825 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6826 uint32_t sec_count = 0, ded_count = 0; 6827 uint32_t i, j, k; 6828 uint32_t reg_value; 6829 6830 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6831 return -EINVAL; 6832 6833 err_data->ue_count = 0; 6834 err_data->ce_count = 0; 6835 6836 mutex_lock(&adev->grbm_idx_mutex); 6837 6838 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6839 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6840 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6841 gfx_v9_0_select_se_sh(adev, j, 0, k); 6842 reg_value = 6843 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6844 if (reg_value) 6845 gfx_v9_0_ras_error_count(adev, 6846 &gfx_v9_0_edc_counter_regs[i], 6847 j, k, reg_value, 6848 &sec_count, &ded_count); 6849 } 6850 } 6851 } 6852 6853 err_data->ce_count += sec_count; 6854 err_data->ue_count += ded_count; 6855 6856 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6857 mutex_unlock(&adev->grbm_idx_mutex); 6858 6859 gfx_v9_0_query_utc_edc_status(adev, err_data); 6860 6861 return 0; 6862 } 6863 6864 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6865 { 6866 const unsigned int cp_coher_cntl = 6867 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6868 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6869 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6870 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6871 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6872 6873 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6874 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6875 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6876 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6877 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6878 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6879 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6880 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6881 } 6882 6883 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6884 uint32_t pipe, bool enable) 6885 { 6886 struct amdgpu_device *adev = ring->adev; 6887 uint32_t val; 6888 uint32_t wcl_cs_reg; 6889 6890 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6891 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6892 6893 switch (pipe) { 6894 case 0: 6895 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6896 break; 6897 case 1: 6898 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6899 break; 6900 case 2: 6901 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6902 break; 6903 case 3: 6904 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6905 break; 6906 default: 6907 DRM_DEBUG("invalid pipe %d\n", pipe); 6908 return; 6909 } 6910 6911 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6912 6913 } 6914 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6915 { 6916 struct amdgpu_device *adev = ring->adev; 6917 uint32_t val; 6918 int i; 6919 6920 6921 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6922 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6923 * around 25% of gpu resources. 6924 */ 6925 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6926 amdgpu_ring_emit_wreg(ring, 6927 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6928 val); 6929 6930 /* Restrict waves for normal/low priority compute queues as well 6931 * to get best QoS for high priority compute jobs. 6932 * 6933 * amdgpu controls only 1st ME(0-3 CS pipes). 6934 */ 6935 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6936 if (i != ring->pipe) 6937 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 6938 6939 } 6940 } 6941 6942 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6943 .name = "gfx_v9_0", 6944 .early_init = gfx_v9_0_early_init, 6945 .late_init = gfx_v9_0_late_init, 6946 .sw_init = gfx_v9_0_sw_init, 6947 .sw_fini = gfx_v9_0_sw_fini, 6948 .hw_init = gfx_v9_0_hw_init, 6949 .hw_fini = gfx_v9_0_hw_fini, 6950 .suspend = gfx_v9_0_suspend, 6951 .resume = gfx_v9_0_resume, 6952 .is_idle = gfx_v9_0_is_idle, 6953 .wait_for_idle = gfx_v9_0_wait_for_idle, 6954 .soft_reset = gfx_v9_0_soft_reset, 6955 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6956 .set_powergating_state = gfx_v9_0_set_powergating_state, 6957 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6958 }; 6959 6960 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6961 .type = AMDGPU_RING_TYPE_GFX, 6962 .align_mask = 0xff, 6963 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6964 .support_64bit_ptrs = true, 6965 .vmhub = AMDGPU_GFXHUB_0, 6966 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6967 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6968 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6969 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6970 5 + /* COND_EXEC */ 6971 7 + /* PIPELINE_SYNC */ 6972 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6973 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6974 2 + /* VM_FLUSH */ 6975 8 + /* FENCE for VM_FLUSH */ 6976 20 + /* GDS switch */ 6977 4 + /* double SWITCH_BUFFER, 6978 the first COND_EXEC jump to the place just 6979 prior to this double SWITCH_BUFFER */ 6980 5 + /* COND_EXEC */ 6981 7 + /* HDP_flush */ 6982 4 + /* VGT_flush */ 6983 14 + /* CE_META */ 6984 31 + /* DE_META */ 6985 3 + /* CNTX_CTRL */ 6986 5 + /* HDP_INVL */ 6987 8 + 8 + /* FENCE x2 */ 6988 2 + /* SWITCH_BUFFER */ 6989 7, /* gfx_v9_0_emit_mem_sync */ 6990 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6991 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6992 .emit_fence = gfx_v9_0_ring_emit_fence, 6993 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6994 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6995 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6996 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6997 .test_ring = gfx_v9_0_ring_test_ring, 6998 .test_ib = gfx_v9_0_ring_test_ib, 6999 .insert_nop = amdgpu_ring_insert_nop, 7000 .pad_ib = amdgpu_ring_generic_pad_ib, 7001 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7002 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7003 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7004 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 7005 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7006 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7007 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7008 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7009 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7010 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7011 }; 7012 7013 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7014 .type = AMDGPU_RING_TYPE_COMPUTE, 7015 .align_mask = 0xff, 7016 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7017 .support_64bit_ptrs = true, 7018 .vmhub = AMDGPU_GFXHUB_0, 7019 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7020 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7021 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7022 .emit_frame_size = 7023 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7024 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7025 5 + /* hdp invalidate */ 7026 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7027 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7028 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7029 2 + /* gfx_v9_0_ring_emit_vm_flush */ 7030 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7031 7 + /* gfx_v9_0_emit_mem_sync */ 7032 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7033 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7034 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7035 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7036 .emit_fence = gfx_v9_0_ring_emit_fence, 7037 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7038 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7039 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7040 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7041 .test_ring = gfx_v9_0_ring_test_ring, 7042 .test_ib = gfx_v9_0_ring_test_ib, 7043 .insert_nop = amdgpu_ring_insert_nop, 7044 .pad_ib = amdgpu_ring_generic_pad_ib, 7045 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7046 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7047 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7048 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7049 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7050 }; 7051 7052 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7053 .type = AMDGPU_RING_TYPE_KIQ, 7054 .align_mask = 0xff, 7055 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7056 .support_64bit_ptrs = true, 7057 .vmhub = AMDGPU_GFXHUB_0, 7058 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7059 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7060 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7061 .emit_frame_size = 7062 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7063 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7064 5 + /* hdp invalidate */ 7065 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7066 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7067 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7068 2 + /* gfx_v9_0_ring_emit_vm_flush */ 7069 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7070 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7071 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7072 .test_ring = gfx_v9_0_ring_test_ring, 7073 .insert_nop = amdgpu_ring_insert_nop, 7074 .pad_ib = amdgpu_ring_generic_pad_ib, 7075 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7076 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7077 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7078 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7079 }; 7080 7081 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7082 { 7083 int i; 7084 7085 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7086 7087 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7088 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7089 7090 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7091 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7092 } 7093 7094 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7095 .set = gfx_v9_0_set_eop_interrupt_state, 7096 .process = gfx_v9_0_eop_irq, 7097 }; 7098 7099 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7100 .set = gfx_v9_0_set_priv_reg_fault_state, 7101 .process = gfx_v9_0_priv_reg_irq, 7102 }; 7103 7104 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7105 .set = gfx_v9_0_set_priv_inst_fault_state, 7106 .process = gfx_v9_0_priv_inst_irq, 7107 }; 7108 7109 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7110 .set = gfx_v9_0_set_cp_ecc_error_state, 7111 .process = amdgpu_gfx_cp_ecc_error_irq, 7112 }; 7113 7114 7115 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7116 { 7117 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7118 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7119 7120 adev->gfx.priv_reg_irq.num_types = 1; 7121 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7122 7123 adev->gfx.priv_inst_irq.num_types = 1; 7124 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7125 7126 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7127 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7128 } 7129 7130 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7131 { 7132 switch (adev->ip_versions[GC_HWIP][0]) { 7133 case IP_VERSION(9, 0, 1): 7134 case IP_VERSION(9, 2, 1): 7135 case IP_VERSION(9, 4, 0): 7136 case IP_VERSION(9, 2, 2): 7137 case IP_VERSION(9, 1, 0): 7138 case IP_VERSION(9, 4, 1): 7139 case IP_VERSION(9, 3, 0): 7140 case IP_VERSION(9, 4, 2): 7141 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7142 break; 7143 default: 7144 break; 7145 } 7146 } 7147 7148 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7149 { 7150 /* init asci gds info */ 7151 switch (adev->ip_versions[GC_HWIP][0]) { 7152 case IP_VERSION(9, 0, 1): 7153 case IP_VERSION(9, 2, 1): 7154 case IP_VERSION(9, 4, 0): 7155 adev->gds.gds_size = 0x10000; 7156 break; 7157 case IP_VERSION(9, 2, 2): 7158 case IP_VERSION(9, 1, 0): 7159 case IP_VERSION(9, 4, 1): 7160 adev->gds.gds_size = 0x1000; 7161 break; 7162 case IP_VERSION(9, 4, 2): 7163 /* aldebaran removed all the GDS internal memory, 7164 * only support GWS opcode in kernel, like barrier 7165 * semaphore.etc */ 7166 adev->gds.gds_size = 0; 7167 break; 7168 default: 7169 adev->gds.gds_size = 0x10000; 7170 break; 7171 } 7172 7173 switch (adev->ip_versions[GC_HWIP][0]) { 7174 case IP_VERSION(9, 0, 1): 7175 case IP_VERSION(9, 4, 0): 7176 adev->gds.gds_compute_max_wave_id = 0x7ff; 7177 break; 7178 case IP_VERSION(9, 2, 1): 7179 adev->gds.gds_compute_max_wave_id = 0x27f; 7180 break; 7181 case IP_VERSION(9, 2, 2): 7182 case IP_VERSION(9, 1, 0): 7183 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7184 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7185 else 7186 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7187 break; 7188 case IP_VERSION(9, 4, 1): 7189 adev->gds.gds_compute_max_wave_id = 0xfff; 7190 break; 7191 case IP_VERSION(9, 4, 2): 7192 /* deprecated for Aldebaran, no usage at all */ 7193 adev->gds.gds_compute_max_wave_id = 0; 7194 break; 7195 default: 7196 /* this really depends on the chip */ 7197 adev->gds.gds_compute_max_wave_id = 0x7ff; 7198 break; 7199 } 7200 7201 adev->gds.gws_size = 64; 7202 adev->gds.oa_size = 16; 7203 } 7204 7205 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7206 u32 bitmap) 7207 { 7208 u32 data; 7209 7210 if (!bitmap) 7211 return; 7212 7213 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7214 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7215 7216 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7217 } 7218 7219 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7220 { 7221 u32 data, mask; 7222 7223 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7224 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7225 7226 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7227 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7228 7229 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7230 7231 return (~data) & mask; 7232 } 7233 7234 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7235 struct amdgpu_cu_info *cu_info) 7236 { 7237 int i, j, k, counter, active_cu_number = 0; 7238 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7239 unsigned disable_masks[4 * 4]; 7240 7241 if (!adev || !cu_info) 7242 return -EINVAL; 7243 7244 /* 7245 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7246 */ 7247 if (adev->gfx.config.max_shader_engines * 7248 adev->gfx.config.max_sh_per_se > 16) 7249 return -EINVAL; 7250 7251 amdgpu_gfx_parse_disable_cu(disable_masks, 7252 adev->gfx.config.max_shader_engines, 7253 adev->gfx.config.max_sh_per_se); 7254 7255 mutex_lock(&adev->grbm_idx_mutex); 7256 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7257 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7258 mask = 1; 7259 ao_bitmap = 0; 7260 counter = 0; 7261 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 7262 gfx_v9_0_set_user_cu_inactive_bitmap( 7263 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7264 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7265 7266 /* 7267 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7268 * 4x4 size array, and it's usually suitable for Vega 7269 * ASICs which has 4*2 SE/SH layout. 7270 * But for Arcturus, SE/SH layout is changed to 8*1. 7271 * To mostly reduce the impact, we make it compatible 7272 * with current bitmap array as below: 7273 * SE4,SH0 --> bitmap[0][1] 7274 * SE5,SH0 --> bitmap[1][1] 7275 * SE6,SH0 --> bitmap[2][1] 7276 * SE7,SH0 --> bitmap[3][1] 7277 */ 7278 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 7279 7280 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7281 if (bitmap & mask) { 7282 if (counter < adev->gfx.config.max_cu_per_sh) 7283 ao_bitmap |= mask; 7284 counter ++; 7285 } 7286 mask <<= 1; 7287 } 7288 active_cu_number += counter; 7289 if (i < 2 && j < 2) 7290 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7291 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7292 } 7293 } 7294 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7295 mutex_unlock(&adev->grbm_idx_mutex); 7296 7297 cu_info->number = active_cu_number; 7298 cu_info->ao_cu_mask = ao_cu_mask; 7299 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7300 7301 return 0; 7302 } 7303 7304 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7305 { 7306 .type = AMD_IP_BLOCK_TYPE_GFX, 7307 .major = 9, 7308 .minor = 0, 7309 .rev = 0, 7310 .funcs = &gfx_v9_0_ip_funcs, 7311 }; 7312