1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #include "ivsrcid/ivsrcid_vislands30.h" 55 56 #define GFX8_NUM_GFX_RINGS 1 57 #define GFX8_MEC_HPD_SIZE 2048 58 59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 63 64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 73 74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 80 81 /* BPM SERDES CMD */ 82 #define SET_BPM_SERDES_CMD 1 83 #define CLE_BPM_SERDES_CMD 0 84 85 /* BPM Register Address*/ 86 enum { 87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 92 BPM_REG_FGCG_MAX 93 }; 94 95 #define RLC_FormatDirectRegListLength 14 96 97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 103 104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 122 123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 129 130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 153 154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 165 166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 172 173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 174 { 175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 191 }; 192 193 static const u32 golden_settings_tonga_a11[] = 194 { 195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 197 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 198 mmGB_GPU_ID, 0x0000000f, 0x00000000, 199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 205 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 211 }; 212 213 static const u32 tonga_golden_common_all[] = 214 { 215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 223 }; 224 225 static const u32 tonga_mgcg_cgcg_init[] = 226 { 227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 302 }; 303 304 static const u32 golden_settings_vegam_a11[] = 305 { 306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 309 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 316 mmSQ_CONFIG, 0x07f80000, 0x01180000, 317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 318 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 vegam_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 333 }; 334 335 static const u32 golden_settings_polaris11_a11[] = 336 { 337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 340 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 347 mmSQ_CONFIG, 0x07f80000, 0x01180000, 348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 349 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 354 }; 355 356 static const u32 polaris11_golden_common_all[] = 357 { 358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 364 }; 365 366 static const u32 golden_settings_polaris10_a11[] = 367 { 368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 372 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 379 mmSQ_CONFIG, 0x07f80000, 0x07180000, 380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 381 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 385 }; 386 387 static const u32 polaris10_golden_common_all[] = 388 { 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 397 }; 398 399 static const u32 fiji_golden_common_all[] = 400 { 401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 411 }; 412 413 static const u32 golden_settings_fiji_a10[] = 414 { 415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 416 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 422 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 426 }; 427 428 static const u32 fiji_mgcg_cgcg_init[] = 429 { 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 465 }; 466 467 static const u32 golden_settings_iceland_a11[] = 468 { 469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 470 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 471 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 472 mmGB_GPU_ID, 0x0000000f, 0x00000000, 473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 480 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 485 }; 486 487 static const u32 iceland_golden_common_all[] = 488 { 489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 497 }; 498 499 static const u32 iceland_mgcg_cgcg_init[] = 500 { 501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 565 }; 566 567 static const u32 cz_golden_settings_a11[] = 568 { 569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 570 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 571 mmGB_GPU_ID, 0x0000000f, 0x00000000, 572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 581 }; 582 583 static const u32 cz_golden_common_all[] = 584 { 585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 593 }; 594 595 static const u32 cz_mgcg_cgcg_init[] = 596 { 597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 672 }; 673 674 static const u32 stoney_golden_settings_a11[] = 675 { 676 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 677 mmGB_GPU_ID, 0x0000000f, 0x00000000, 678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 686 }; 687 688 static const u32 stoney_golden_common_all[] = 689 { 690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 698 }; 699 700 static const u32 stoney_mgcg_cgcg_init[] = 701 { 702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 707 }; 708 709 710 static const char * const sq_edc_source_names[] = { 711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 718 }; 719 720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 728 729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 730 { 731 switch (adev->asic_type) { 732 case CHIP_TOPAZ: 733 amdgpu_device_program_register_sequence(adev, 734 iceland_mgcg_cgcg_init, 735 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 736 amdgpu_device_program_register_sequence(adev, 737 golden_settings_iceland_a11, 738 ARRAY_SIZE(golden_settings_iceland_a11)); 739 amdgpu_device_program_register_sequence(adev, 740 iceland_golden_common_all, 741 ARRAY_SIZE(iceland_golden_common_all)); 742 break; 743 case CHIP_FIJI: 744 amdgpu_device_program_register_sequence(adev, 745 fiji_mgcg_cgcg_init, 746 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 747 amdgpu_device_program_register_sequence(adev, 748 golden_settings_fiji_a10, 749 ARRAY_SIZE(golden_settings_fiji_a10)); 750 amdgpu_device_program_register_sequence(adev, 751 fiji_golden_common_all, 752 ARRAY_SIZE(fiji_golden_common_all)); 753 break; 754 755 case CHIP_TONGA: 756 amdgpu_device_program_register_sequence(adev, 757 tonga_mgcg_cgcg_init, 758 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 759 amdgpu_device_program_register_sequence(adev, 760 golden_settings_tonga_a11, 761 ARRAY_SIZE(golden_settings_tonga_a11)); 762 amdgpu_device_program_register_sequence(adev, 763 tonga_golden_common_all, 764 ARRAY_SIZE(tonga_golden_common_all)); 765 break; 766 case CHIP_VEGAM: 767 amdgpu_device_program_register_sequence(adev, 768 golden_settings_vegam_a11, 769 ARRAY_SIZE(golden_settings_vegam_a11)); 770 amdgpu_device_program_register_sequence(adev, 771 vegam_golden_common_all, 772 ARRAY_SIZE(vegam_golden_common_all)); 773 break; 774 case CHIP_POLARIS11: 775 case CHIP_POLARIS12: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_polaris11_a11, 778 ARRAY_SIZE(golden_settings_polaris11_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 polaris11_golden_common_all, 781 ARRAY_SIZE(polaris11_golden_common_all)); 782 break; 783 case CHIP_POLARIS10: 784 amdgpu_device_program_register_sequence(adev, 785 golden_settings_polaris10_a11, 786 ARRAY_SIZE(golden_settings_polaris10_a11)); 787 amdgpu_device_program_register_sequence(adev, 788 polaris10_golden_common_all, 789 ARRAY_SIZE(polaris10_golden_common_all)); 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 791 if (adev->pdev->revision == 0xc7 && 792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 797 } 798 break; 799 case CHIP_CARRIZO: 800 amdgpu_device_program_register_sequence(adev, 801 cz_mgcg_cgcg_init, 802 ARRAY_SIZE(cz_mgcg_cgcg_init)); 803 amdgpu_device_program_register_sequence(adev, 804 cz_golden_settings_a11, 805 ARRAY_SIZE(cz_golden_settings_a11)); 806 amdgpu_device_program_register_sequence(adev, 807 cz_golden_common_all, 808 ARRAY_SIZE(cz_golden_common_all)); 809 break; 810 case CHIP_STONEY: 811 amdgpu_device_program_register_sequence(adev, 812 stoney_mgcg_cgcg_init, 813 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 814 amdgpu_device_program_register_sequence(adev, 815 stoney_golden_settings_a11, 816 ARRAY_SIZE(stoney_golden_settings_a11)); 817 amdgpu_device_program_register_sequence(adev, 818 stoney_golden_common_all, 819 ARRAY_SIZE(stoney_golden_common_all)); 820 break; 821 default: 822 break; 823 } 824 } 825 826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 827 { 828 adev->gfx.scratch.num_reg = 8; 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 831 } 832 833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 uint32_t scratch; 837 uint32_t tmp = 0; 838 unsigned i; 839 int r; 840 841 r = amdgpu_gfx_scratch_get(adev, &scratch); 842 if (r) { 843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 844 return r; 845 } 846 WREG32(scratch, 0xCAFEDEAD); 847 r = amdgpu_ring_alloc(ring, 3); 848 if (r) { 849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 850 ring->idx, r); 851 amdgpu_gfx_scratch_free(adev, scratch); 852 return r; 853 } 854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 856 amdgpu_ring_write(ring, 0xDEADBEEF); 857 amdgpu_ring_commit(ring); 858 859 for (i = 0; i < adev->usec_timeout; i++) { 860 tmp = RREG32(scratch); 861 if (tmp == 0xDEADBEEF) 862 break; 863 DRM_UDELAY(1); 864 } 865 if (i < adev->usec_timeout) { 866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 867 ring->idx, i); 868 } else { 869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 870 ring->idx, scratch, tmp); 871 r = -EINVAL; 872 } 873 amdgpu_gfx_scratch_free(adev, scratch); 874 return r; 875 } 876 877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 878 { 879 struct amdgpu_device *adev = ring->adev; 880 struct amdgpu_ib ib; 881 struct dma_fence *f = NULL; 882 883 unsigned int index; 884 uint64_t gpu_addr; 885 uint32_t tmp; 886 long r; 887 888 r = amdgpu_device_wb_get(adev, &index); 889 if (r) { 890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 891 return r; 892 } 893 894 gpu_addr = adev->wb.gpu_addr + (index * 4); 895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 896 memset(&ib, 0, sizeof(ib)); 897 r = amdgpu_ib_get(adev, NULL, 16, &ib); 898 if (r) { 899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 900 goto err1; 901 } 902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 904 ib.ptr[2] = lower_32_bits(gpu_addr); 905 ib.ptr[3] = upper_32_bits(gpu_addr); 906 ib.ptr[4] = 0xDEADBEEF; 907 ib.length_dw = 5; 908 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 910 if (r) 911 goto err2; 912 913 r = dma_fence_wait_timeout(f, false, timeout); 914 if (r == 0) { 915 DRM_ERROR("amdgpu: IB test timed out.\n"); 916 r = -ETIMEDOUT; 917 goto err2; 918 } else if (r < 0) { 919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 920 goto err2; 921 } 922 923 tmp = adev->wb.wb[index]; 924 if (tmp == 0xDEADBEEF) { 925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 926 r = 0; 927 } else { 928 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 929 r = -EINVAL; 930 } 931 932 err2: 933 amdgpu_ib_free(adev, &ib, NULL); 934 dma_fence_put(f); 935 err1: 936 amdgpu_device_wb_free(adev, index); 937 return r; 938 } 939 940 941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 942 { 943 release_firmware(adev->gfx.pfp_fw); 944 adev->gfx.pfp_fw = NULL; 945 release_firmware(adev->gfx.me_fw); 946 adev->gfx.me_fw = NULL; 947 release_firmware(adev->gfx.ce_fw); 948 adev->gfx.ce_fw = NULL; 949 release_firmware(adev->gfx.rlc_fw); 950 adev->gfx.rlc_fw = NULL; 951 release_firmware(adev->gfx.mec_fw); 952 adev->gfx.mec_fw = NULL; 953 if ((adev->asic_type != CHIP_STONEY) && 954 (adev->asic_type != CHIP_TOPAZ)) 955 release_firmware(adev->gfx.mec2_fw); 956 adev->gfx.mec2_fw = NULL; 957 958 kfree(adev->gfx.rlc.register_list_format); 959 } 960 961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 962 { 963 const char *chip_name; 964 char fw_name[30]; 965 int err; 966 struct amdgpu_firmware_info *info = NULL; 967 const struct common_firmware_header *header = NULL; 968 const struct gfx_firmware_header_v1_0 *cp_hdr; 969 const struct rlc_firmware_header_v2_0 *rlc_hdr; 970 unsigned int *tmp = NULL, i; 971 972 DRM_DEBUG("\n"); 973 974 switch (adev->asic_type) { 975 case CHIP_TOPAZ: 976 chip_name = "topaz"; 977 break; 978 case CHIP_TONGA: 979 chip_name = "tonga"; 980 break; 981 case CHIP_CARRIZO: 982 chip_name = "carrizo"; 983 break; 984 case CHIP_FIJI: 985 chip_name = "fiji"; 986 break; 987 case CHIP_STONEY: 988 chip_name = "stoney"; 989 break; 990 case CHIP_POLARIS10: 991 chip_name = "polaris10"; 992 break; 993 case CHIP_POLARIS11: 994 chip_name = "polaris11"; 995 break; 996 case CHIP_POLARIS12: 997 chip_name = "polaris12"; 998 break; 999 case CHIP_VEGAM: 1000 chip_name = "vegam"; 1001 break; 1002 default: 1003 BUG(); 1004 } 1005 1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1009 if (err == -ENOENT) { 1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1012 } 1013 } else { 1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1016 } 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1029 if (err == -ENOENT) { 1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1032 } 1033 } else { 1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1036 } 1037 if (err) 1038 goto out; 1039 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1040 if (err) 1041 goto out; 1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1044 1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1046 1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1050 if (err == -ENOENT) { 1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1053 } 1054 } else { 1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1057 } 1058 if (err) 1059 goto out; 1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1061 if (err) 1062 goto out; 1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1066 1067 /* 1068 * Support for MCBP/Virtualization in combination with chained IBs is 1069 * formal released on feature version #46 1070 */ 1071 if (adev->gfx.ce_feature_version >= 46 && 1072 adev->gfx.pfp_feature_version >= 46) { 1073 adev->virt.chained_ib_support = true; 1074 DRM_INFO("Chained IB support enabled!\n"); 1075 } else 1076 adev->virt.chained_ib_support = false; 1077 1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1080 if (err) 1081 goto out; 1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1086 1087 adev->gfx.rlc.save_and_restore_offset = 1088 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1089 adev->gfx.rlc.clear_state_descriptor_offset = 1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1091 adev->gfx.rlc.avail_scratch_ram_locations = 1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1093 adev->gfx.rlc.reg_restore_list_size = 1094 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1095 adev->gfx.rlc.reg_list_format_start = 1096 le32_to_cpu(rlc_hdr->reg_list_format_start); 1097 adev->gfx.rlc.reg_list_format_separate_start = 1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1099 adev->gfx.rlc.starting_offsets_start = 1100 le32_to_cpu(rlc_hdr->starting_offsets_start); 1101 adev->gfx.rlc.reg_list_format_size_bytes = 1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1103 adev->gfx.rlc.reg_list_size_bytes = 1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1105 1106 adev->gfx.rlc.register_list_format = 1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1109 1110 if (!adev->gfx.rlc.register_list_format) { 1111 err = -ENOMEM; 1112 goto out; 1113 } 1114 1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1117 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1119 1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1121 1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1124 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1126 1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1130 if (err == -ENOENT) { 1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1133 } 1134 } else { 1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1137 } 1138 if (err) 1139 goto out; 1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1141 if (err) 1142 goto out; 1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1146 1147 if ((adev->asic_type != CHIP_STONEY) && 1148 (adev->asic_type != CHIP_TOPAZ)) { 1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1152 if (err == -ENOENT) { 1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1155 } 1156 } else { 1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1159 } 1160 if (!err) { 1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1162 if (err) 1163 goto out; 1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1165 adev->gfx.mec2_fw->data; 1166 adev->gfx.mec2_fw_version = 1167 le32_to_cpu(cp_hdr->header.ucode_version); 1168 adev->gfx.mec2_feature_version = 1169 le32_to_cpu(cp_hdr->ucode_feature_version); 1170 } else { 1171 err = 0; 1172 adev->gfx.mec2_fw = NULL; 1173 } 1174 } 1175 1176 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1177 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1178 info->fw = adev->gfx.pfp_fw; 1179 header = (const struct common_firmware_header *)info->fw->data; 1180 adev->firmware.fw_size += 1181 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1182 1183 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1184 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1185 info->fw = adev->gfx.me_fw; 1186 header = (const struct common_firmware_header *)info->fw->data; 1187 adev->firmware.fw_size += 1188 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1189 1190 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1191 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1192 info->fw = adev->gfx.ce_fw; 1193 header = (const struct common_firmware_header *)info->fw->data; 1194 adev->firmware.fw_size += 1195 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1196 1197 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1198 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1199 info->fw = adev->gfx.rlc_fw; 1200 header = (const struct common_firmware_header *)info->fw->data; 1201 adev->firmware.fw_size += 1202 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1203 1204 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1205 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1206 info->fw = adev->gfx.mec_fw; 1207 header = (const struct common_firmware_header *)info->fw->data; 1208 adev->firmware.fw_size += 1209 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1210 1211 /* we need account JT in */ 1212 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1213 adev->firmware.fw_size += 1214 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1215 1216 if (amdgpu_sriov_vf(adev)) { 1217 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1218 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1219 info->fw = adev->gfx.mec_fw; 1220 adev->firmware.fw_size += 1221 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1222 } 1223 1224 if (adev->gfx.mec2_fw) { 1225 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1226 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1227 info->fw = adev->gfx.mec2_fw; 1228 header = (const struct common_firmware_header *)info->fw->data; 1229 adev->firmware.fw_size += 1230 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1231 } 1232 1233 out: 1234 if (err) { 1235 dev_err(adev->dev, 1236 "gfx8: Failed to load firmware \"%s\"\n", 1237 fw_name); 1238 release_firmware(adev->gfx.pfp_fw); 1239 adev->gfx.pfp_fw = NULL; 1240 release_firmware(adev->gfx.me_fw); 1241 adev->gfx.me_fw = NULL; 1242 release_firmware(adev->gfx.ce_fw); 1243 adev->gfx.ce_fw = NULL; 1244 release_firmware(adev->gfx.rlc_fw); 1245 adev->gfx.rlc_fw = NULL; 1246 release_firmware(adev->gfx.mec_fw); 1247 adev->gfx.mec_fw = NULL; 1248 release_firmware(adev->gfx.mec2_fw); 1249 adev->gfx.mec2_fw = NULL; 1250 } 1251 return err; 1252 } 1253 1254 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1255 volatile u32 *buffer) 1256 { 1257 u32 count = 0, i; 1258 const struct cs_section_def *sect = NULL; 1259 const struct cs_extent_def *ext = NULL; 1260 1261 if (adev->gfx.rlc.cs_data == NULL) 1262 return; 1263 if (buffer == NULL) 1264 return; 1265 1266 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1267 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1268 1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1270 buffer[count++] = cpu_to_le32(0x80000000); 1271 buffer[count++] = cpu_to_le32(0x80000000); 1272 1273 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1274 for (ext = sect->section; ext->extent != NULL; ++ext) { 1275 if (sect->id == SECT_CONTEXT) { 1276 buffer[count++] = 1277 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1278 buffer[count++] = cpu_to_le32(ext->reg_index - 1279 PACKET3_SET_CONTEXT_REG_START); 1280 for (i = 0; i < ext->reg_count; i++) 1281 buffer[count++] = cpu_to_le32(ext->extent[i]); 1282 } else { 1283 return; 1284 } 1285 } 1286 } 1287 1288 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1289 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1290 PACKET3_SET_CONTEXT_REG_START); 1291 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1292 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1293 1294 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1295 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1296 1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1298 buffer[count++] = cpu_to_le32(0); 1299 } 1300 1301 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1302 { 1303 const __le32 *fw_data; 1304 volatile u32 *dst_ptr; 1305 int me, i, max_me = 4; 1306 u32 bo_offset = 0; 1307 u32 table_offset, table_size; 1308 1309 if (adev->asic_type == CHIP_CARRIZO) 1310 max_me = 5; 1311 1312 /* write the cp table buffer */ 1313 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1314 for (me = 0; me < max_me; me++) { 1315 if (me == 0) { 1316 const struct gfx_firmware_header_v1_0 *hdr = 1317 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1318 fw_data = (const __le32 *) 1319 (adev->gfx.ce_fw->data + 1320 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1321 table_offset = le32_to_cpu(hdr->jt_offset); 1322 table_size = le32_to_cpu(hdr->jt_size); 1323 } else if (me == 1) { 1324 const struct gfx_firmware_header_v1_0 *hdr = 1325 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1326 fw_data = (const __le32 *) 1327 (adev->gfx.pfp_fw->data + 1328 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1329 table_offset = le32_to_cpu(hdr->jt_offset); 1330 table_size = le32_to_cpu(hdr->jt_size); 1331 } else if (me == 2) { 1332 const struct gfx_firmware_header_v1_0 *hdr = 1333 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1334 fw_data = (const __le32 *) 1335 (adev->gfx.me_fw->data + 1336 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1337 table_offset = le32_to_cpu(hdr->jt_offset); 1338 table_size = le32_to_cpu(hdr->jt_size); 1339 } else if (me == 3) { 1340 const struct gfx_firmware_header_v1_0 *hdr = 1341 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1342 fw_data = (const __le32 *) 1343 (adev->gfx.mec_fw->data + 1344 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1345 table_offset = le32_to_cpu(hdr->jt_offset); 1346 table_size = le32_to_cpu(hdr->jt_size); 1347 } else if (me == 4) { 1348 const struct gfx_firmware_header_v1_0 *hdr = 1349 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1350 fw_data = (const __le32 *) 1351 (adev->gfx.mec2_fw->data + 1352 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1353 table_offset = le32_to_cpu(hdr->jt_offset); 1354 table_size = le32_to_cpu(hdr->jt_size); 1355 } 1356 1357 for (i = 0; i < table_size; i ++) { 1358 dst_ptr[bo_offset + i] = 1359 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1360 } 1361 1362 bo_offset += table_size; 1363 } 1364 } 1365 1366 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1367 { 1368 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1369 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1370 } 1371 1372 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1373 { 1374 volatile u32 *dst_ptr; 1375 u32 dws; 1376 const struct cs_section_def *cs_data; 1377 int r; 1378 1379 adev->gfx.rlc.cs_data = vi_cs_data; 1380 1381 cs_data = adev->gfx.rlc.cs_data; 1382 1383 if (cs_data) { 1384 /* clear state block */ 1385 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1386 1387 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1388 AMDGPU_GEM_DOMAIN_VRAM, 1389 &adev->gfx.rlc.clear_state_obj, 1390 &adev->gfx.rlc.clear_state_gpu_addr, 1391 (void **)&adev->gfx.rlc.cs_ptr); 1392 if (r) { 1393 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1394 gfx_v8_0_rlc_fini(adev); 1395 return r; 1396 } 1397 1398 /* set up the cs buffer */ 1399 dst_ptr = adev->gfx.rlc.cs_ptr; 1400 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1401 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1402 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1403 } 1404 1405 if ((adev->asic_type == CHIP_CARRIZO) || 1406 (adev->asic_type == CHIP_STONEY)) { 1407 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1408 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1409 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1410 &adev->gfx.rlc.cp_table_obj, 1411 &adev->gfx.rlc.cp_table_gpu_addr, 1412 (void **)&adev->gfx.rlc.cp_table_ptr); 1413 if (r) { 1414 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1415 return r; 1416 } 1417 1418 cz_init_cp_jump_table(adev); 1419 1420 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1421 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1422 } 1423 1424 return 0; 1425 } 1426 1427 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1428 { 1429 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1430 } 1431 1432 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1433 { 1434 int r; 1435 u32 *hpd; 1436 size_t mec_hpd_size; 1437 1438 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1439 1440 /* take ownership of the relevant compute queues */ 1441 amdgpu_gfx_compute_queue_acquire(adev); 1442 1443 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1444 1445 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1446 AMDGPU_GEM_DOMAIN_GTT, 1447 &adev->gfx.mec.hpd_eop_obj, 1448 &adev->gfx.mec.hpd_eop_gpu_addr, 1449 (void **)&hpd); 1450 if (r) { 1451 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1452 return r; 1453 } 1454 1455 memset(hpd, 0, mec_hpd_size); 1456 1457 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1458 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1459 1460 return 0; 1461 } 1462 1463 static const u32 vgpr_init_compute_shader[] = 1464 { 1465 0x7e000209, 0x7e020208, 1466 0x7e040207, 0x7e060206, 1467 0x7e080205, 0x7e0a0204, 1468 0x7e0c0203, 0x7e0e0202, 1469 0x7e100201, 0x7e120200, 1470 0x7e140209, 0x7e160208, 1471 0x7e180207, 0x7e1a0206, 1472 0x7e1c0205, 0x7e1e0204, 1473 0x7e200203, 0x7e220202, 1474 0x7e240201, 0x7e260200, 1475 0x7e280209, 0x7e2a0208, 1476 0x7e2c0207, 0x7e2e0206, 1477 0x7e300205, 0x7e320204, 1478 0x7e340203, 0x7e360202, 1479 0x7e380201, 0x7e3a0200, 1480 0x7e3c0209, 0x7e3e0208, 1481 0x7e400207, 0x7e420206, 1482 0x7e440205, 0x7e460204, 1483 0x7e480203, 0x7e4a0202, 1484 0x7e4c0201, 0x7e4e0200, 1485 0x7e500209, 0x7e520208, 1486 0x7e540207, 0x7e560206, 1487 0x7e580205, 0x7e5a0204, 1488 0x7e5c0203, 0x7e5e0202, 1489 0x7e600201, 0x7e620200, 1490 0x7e640209, 0x7e660208, 1491 0x7e680207, 0x7e6a0206, 1492 0x7e6c0205, 0x7e6e0204, 1493 0x7e700203, 0x7e720202, 1494 0x7e740201, 0x7e760200, 1495 0x7e780209, 0x7e7a0208, 1496 0x7e7c0207, 0x7e7e0206, 1497 0xbf8a0000, 0xbf810000, 1498 }; 1499 1500 static const u32 sgpr_init_compute_shader[] = 1501 { 1502 0xbe8a0100, 0xbe8c0102, 1503 0xbe8e0104, 0xbe900106, 1504 0xbe920108, 0xbe940100, 1505 0xbe960102, 0xbe980104, 1506 0xbe9a0106, 0xbe9c0108, 1507 0xbe9e0100, 0xbea00102, 1508 0xbea20104, 0xbea40106, 1509 0xbea60108, 0xbea80100, 1510 0xbeaa0102, 0xbeac0104, 1511 0xbeae0106, 0xbeb00108, 1512 0xbeb20100, 0xbeb40102, 1513 0xbeb60104, 0xbeb80106, 1514 0xbeba0108, 0xbebc0100, 1515 0xbebe0102, 0xbec00104, 1516 0xbec20106, 0xbec40108, 1517 0xbec60100, 0xbec80102, 1518 0xbee60004, 0xbee70005, 1519 0xbeea0006, 0xbeeb0007, 1520 0xbee80008, 0xbee90009, 1521 0xbefc0000, 0xbf8a0000, 1522 0xbf810000, 0x00000000, 1523 }; 1524 1525 static const u32 vgpr_init_regs[] = 1526 { 1527 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1528 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1529 mmCOMPUTE_NUM_THREAD_X, 256*4, 1530 mmCOMPUTE_NUM_THREAD_Y, 1, 1531 mmCOMPUTE_NUM_THREAD_Z, 1, 1532 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1533 mmCOMPUTE_PGM_RSRC2, 20, 1534 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1535 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1536 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1537 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1538 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1539 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1540 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1541 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1542 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1543 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1544 }; 1545 1546 static const u32 sgpr1_init_regs[] = 1547 { 1548 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1549 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1550 mmCOMPUTE_NUM_THREAD_X, 256*5, 1551 mmCOMPUTE_NUM_THREAD_Y, 1, 1552 mmCOMPUTE_NUM_THREAD_Z, 1, 1553 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1554 mmCOMPUTE_PGM_RSRC2, 20, 1555 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1556 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1557 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1558 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1559 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1560 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1561 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1562 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1563 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1564 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1565 }; 1566 1567 static const u32 sgpr2_init_regs[] = 1568 { 1569 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1570 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1571 mmCOMPUTE_NUM_THREAD_X, 256*5, 1572 mmCOMPUTE_NUM_THREAD_Y, 1, 1573 mmCOMPUTE_NUM_THREAD_Z, 1, 1574 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1575 mmCOMPUTE_PGM_RSRC2, 20, 1576 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1577 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1578 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1579 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1580 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1581 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1582 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1583 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1584 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1585 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1586 }; 1587 1588 static const u32 sec_ded_counter_registers[] = 1589 { 1590 mmCPC_EDC_ATC_CNT, 1591 mmCPC_EDC_SCRATCH_CNT, 1592 mmCPC_EDC_UCODE_CNT, 1593 mmCPF_EDC_ATC_CNT, 1594 mmCPF_EDC_ROQ_CNT, 1595 mmCPF_EDC_TAG_CNT, 1596 mmCPG_EDC_ATC_CNT, 1597 mmCPG_EDC_DMA_CNT, 1598 mmCPG_EDC_TAG_CNT, 1599 mmDC_EDC_CSINVOC_CNT, 1600 mmDC_EDC_RESTORE_CNT, 1601 mmDC_EDC_STATE_CNT, 1602 mmGDS_EDC_CNT, 1603 mmGDS_EDC_GRBM_CNT, 1604 mmGDS_EDC_OA_DED, 1605 mmSPI_EDC_CNT, 1606 mmSQC_ATC_EDC_GATCL1_CNT, 1607 mmSQC_EDC_CNT, 1608 mmSQ_EDC_DED_CNT, 1609 mmSQ_EDC_INFO, 1610 mmSQ_EDC_SEC_CNT, 1611 mmTCC_EDC_CNT, 1612 mmTCP_ATC_EDC_GATCL1_CNT, 1613 mmTCP_EDC_CNT, 1614 mmTD_EDC_CNT 1615 }; 1616 1617 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1618 { 1619 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1620 struct amdgpu_ib ib; 1621 struct dma_fence *f = NULL; 1622 int r, i; 1623 u32 tmp; 1624 unsigned total_size, vgpr_offset, sgpr_offset; 1625 u64 gpu_addr; 1626 1627 /* only supported on CZ */ 1628 if (adev->asic_type != CHIP_CARRIZO) 1629 return 0; 1630 1631 /* bail if the compute ring is not ready */ 1632 if (!ring->ready) 1633 return 0; 1634 1635 tmp = RREG32(mmGB_EDC_MODE); 1636 WREG32(mmGB_EDC_MODE, 0); 1637 1638 total_size = 1639 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1640 total_size += 1641 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1642 total_size += 1643 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1644 total_size = ALIGN(total_size, 256); 1645 vgpr_offset = total_size; 1646 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1647 sgpr_offset = total_size; 1648 total_size += sizeof(sgpr_init_compute_shader); 1649 1650 /* allocate an indirect buffer to put the commands in */ 1651 memset(&ib, 0, sizeof(ib)); 1652 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1653 if (r) { 1654 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1655 return r; 1656 } 1657 1658 /* load the compute shaders */ 1659 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1660 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1661 1662 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1663 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1664 1665 /* init the ib length to 0 */ 1666 ib.length_dw = 0; 1667 1668 /* VGPR */ 1669 /* write the register state for the compute dispatch */ 1670 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1671 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1672 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1673 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1674 } 1675 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1676 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1677 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1678 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1679 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1680 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1681 1682 /* write dispatch packet */ 1683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1684 ib.ptr[ib.length_dw++] = 8; /* x */ 1685 ib.ptr[ib.length_dw++] = 1; /* y */ 1686 ib.ptr[ib.length_dw++] = 1; /* z */ 1687 ib.ptr[ib.length_dw++] = 1688 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1689 1690 /* write CS partial flush packet */ 1691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1692 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1693 1694 /* SGPR1 */ 1695 /* write the register state for the compute dispatch */ 1696 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1697 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1698 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1699 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1700 } 1701 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1702 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1703 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1704 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1705 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1706 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1707 1708 /* write dispatch packet */ 1709 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1710 ib.ptr[ib.length_dw++] = 8; /* x */ 1711 ib.ptr[ib.length_dw++] = 1; /* y */ 1712 ib.ptr[ib.length_dw++] = 1; /* z */ 1713 ib.ptr[ib.length_dw++] = 1714 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1715 1716 /* write CS partial flush packet */ 1717 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1718 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1719 1720 /* SGPR2 */ 1721 /* write the register state for the compute dispatch */ 1722 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1723 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1724 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1725 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1726 } 1727 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1728 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1729 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1730 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1731 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1732 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1733 1734 /* write dispatch packet */ 1735 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1736 ib.ptr[ib.length_dw++] = 8; /* x */ 1737 ib.ptr[ib.length_dw++] = 1; /* y */ 1738 ib.ptr[ib.length_dw++] = 1; /* z */ 1739 ib.ptr[ib.length_dw++] = 1740 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1741 1742 /* write CS partial flush packet */ 1743 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1744 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1745 1746 /* shedule the ib on the ring */ 1747 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1748 if (r) { 1749 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1750 goto fail; 1751 } 1752 1753 /* wait for the GPU to finish processing the IB */ 1754 r = dma_fence_wait(f, false); 1755 if (r) { 1756 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1757 goto fail; 1758 } 1759 1760 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1761 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1762 WREG32(mmGB_EDC_MODE, tmp); 1763 1764 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1765 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1766 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1767 1768 1769 /* read back registers to clear the counters */ 1770 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1771 RREG32(sec_ded_counter_registers[i]); 1772 1773 fail: 1774 amdgpu_ib_free(adev, &ib, NULL); 1775 dma_fence_put(f); 1776 1777 return r; 1778 } 1779 1780 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1781 { 1782 u32 gb_addr_config; 1783 u32 mc_shared_chmap, mc_arb_ramcfg; 1784 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1785 u32 tmp; 1786 int ret; 1787 1788 switch (adev->asic_type) { 1789 case CHIP_TOPAZ: 1790 adev->gfx.config.max_shader_engines = 1; 1791 adev->gfx.config.max_tile_pipes = 2; 1792 adev->gfx.config.max_cu_per_sh = 6; 1793 adev->gfx.config.max_sh_per_se = 1; 1794 adev->gfx.config.max_backends_per_se = 2; 1795 adev->gfx.config.max_texture_channel_caches = 2; 1796 adev->gfx.config.max_gprs = 256; 1797 adev->gfx.config.max_gs_threads = 32; 1798 adev->gfx.config.max_hw_contexts = 8; 1799 1800 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1801 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1802 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1803 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1804 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1805 break; 1806 case CHIP_FIJI: 1807 adev->gfx.config.max_shader_engines = 4; 1808 adev->gfx.config.max_tile_pipes = 16; 1809 adev->gfx.config.max_cu_per_sh = 16; 1810 adev->gfx.config.max_sh_per_se = 1; 1811 adev->gfx.config.max_backends_per_se = 4; 1812 adev->gfx.config.max_texture_channel_caches = 16; 1813 adev->gfx.config.max_gprs = 256; 1814 adev->gfx.config.max_gs_threads = 32; 1815 adev->gfx.config.max_hw_contexts = 8; 1816 1817 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1818 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1819 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1820 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1821 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1822 break; 1823 case CHIP_POLARIS11: 1824 case CHIP_POLARIS12: 1825 ret = amdgpu_atombios_get_gfx_info(adev); 1826 if (ret) 1827 return ret; 1828 adev->gfx.config.max_gprs = 256; 1829 adev->gfx.config.max_gs_threads = 32; 1830 adev->gfx.config.max_hw_contexts = 8; 1831 1832 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1833 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1834 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1835 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1836 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1837 break; 1838 case CHIP_POLARIS10: 1839 case CHIP_VEGAM: 1840 ret = amdgpu_atombios_get_gfx_info(adev); 1841 if (ret) 1842 return ret; 1843 adev->gfx.config.max_gprs = 256; 1844 adev->gfx.config.max_gs_threads = 32; 1845 adev->gfx.config.max_hw_contexts = 8; 1846 1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1852 break; 1853 case CHIP_TONGA: 1854 adev->gfx.config.max_shader_engines = 4; 1855 adev->gfx.config.max_tile_pipes = 8; 1856 adev->gfx.config.max_cu_per_sh = 8; 1857 adev->gfx.config.max_sh_per_se = 1; 1858 adev->gfx.config.max_backends_per_se = 2; 1859 adev->gfx.config.max_texture_channel_caches = 8; 1860 adev->gfx.config.max_gprs = 256; 1861 adev->gfx.config.max_gs_threads = 32; 1862 adev->gfx.config.max_hw_contexts = 8; 1863 1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1868 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1869 break; 1870 case CHIP_CARRIZO: 1871 adev->gfx.config.max_shader_engines = 1; 1872 adev->gfx.config.max_tile_pipes = 2; 1873 adev->gfx.config.max_sh_per_se = 1; 1874 adev->gfx.config.max_backends_per_se = 2; 1875 adev->gfx.config.max_cu_per_sh = 8; 1876 adev->gfx.config.max_texture_channel_caches = 2; 1877 adev->gfx.config.max_gprs = 256; 1878 adev->gfx.config.max_gs_threads = 32; 1879 adev->gfx.config.max_hw_contexts = 8; 1880 1881 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1882 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1883 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1884 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1885 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1886 break; 1887 case CHIP_STONEY: 1888 adev->gfx.config.max_shader_engines = 1; 1889 adev->gfx.config.max_tile_pipes = 2; 1890 adev->gfx.config.max_sh_per_se = 1; 1891 adev->gfx.config.max_backends_per_se = 1; 1892 adev->gfx.config.max_cu_per_sh = 3; 1893 adev->gfx.config.max_texture_channel_caches = 2; 1894 adev->gfx.config.max_gprs = 256; 1895 adev->gfx.config.max_gs_threads = 16; 1896 adev->gfx.config.max_hw_contexts = 8; 1897 1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1902 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1903 break; 1904 default: 1905 adev->gfx.config.max_shader_engines = 2; 1906 adev->gfx.config.max_tile_pipes = 4; 1907 adev->gfx.config.max_cu_per_sh = 2; 1908 adev->gfx.config.max_sh_per_se = 1; 1909 adev->gfx.config.max_backends_per_se = 2; 1910 adev->gfx.config.max_texture_channel_caches = 4; 1911 adev->gfx.config.max_gprs = 256; 1912 adev->gfx.config.max_gs_threads = 32; 1913 adev->gfx.config.max_hw_contexts = 8; 1914 1915 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1916 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1917 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1918 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1919 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1920 break; 1921 } 1922 1923 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1924 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1925 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1926 1927 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1928 adev->gfx.config.mem_max_burst_length_bytes = 256; 1929 if (adev->flags & AMD_IS_APU) { 1930 /* Get memory bank mapping mode. */ 1931 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1932 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1933 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1934 1935 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1936 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1937 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1938 1939 /* Validate settings in case only one DIMM installed. */ 1940 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1941 dimm00_addr_map = 0; 1942 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1943 dimm01_addr_map = 0; 1944 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1945 dimm10_addr_map = 0; 1946 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1947 dimm11_addr_map = 0; 1948 1949 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1950 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1951 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1952 adev->gfx.config.mem_row_size_in_kb = 2; 1953 else 1954 adev->gfx.config.mem_row_size_in_kb = 1; 1955 } else { 1956 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1957 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1958 if (adev->gfx.config.mem_row_size_in_kb > 4) 1959 adev->gfx.config.mem_row_size_in_kb = 4; 1960 } 1961 1962 adev->gfx.config.shader_engine_tile_size = 32; 1963 adev->gfx.config.num_gpus = 1; 1964 adev->gfx.config.multi_gpu_tile_size = 64; 1965 1966 /* fix up row size */ 1967 switch (adev->gfx.config.mem_row_size_in_kb) { 1968 case 1: 1969 default: 1970 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1971 break; 1972 case 2: 1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1974 break; 1975 case 4: 1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1977 break; 1978 } 1979 adev->gfx.config.gb_addr_config = gb_addr_config; 1980 1981 return 0; 1982 } 1983 1984 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1985 int mec, int pipe, int queue) 1986 { 1987 int r; 1988 unsigned irq_type; 1989 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1990 1991 ring = &adev->gfx.compute_ring[ring_id]; 1992 1993 /* mec0 is me1 */ 1994 ring->me = mec + 1; 1995 ring->pipe = pipe; 1996 ring->queue = queue; 1997 1998 ring->ring_obj = NULL; 1999 ring->use_doorbell = true; 2000 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2001 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2002 + (ring_id * GFX8_MEC_HPD_SIZE); 2003 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2004 2005 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2006 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2007 + ring->pipe; 2008 2009 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2010 r = amdgpu_ring_init(adev, ring, 1024, 2011 &adev->gfx.eop_irq, irq_type); 2012 if (r) 2013 return r; 2014 2015 2016 return 0; 2017 } 2018 2019 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2020 2021 static int gfx_v8_0_sw_init(void *handle) 2022 { 2023 int i, j, k, r, ring_id; 2024 struct amdgpu_ring *ring; 2025 struct amdgpu_kiq *kiq; 2026 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2027 2028 switch (adev->asic_type) { 2029 case CHIP_TONGA: 2030 case CHIP_CARRIZO: 2031 case CHIP_FIJI: 2032 case CHIP_POLARIS10: 2033 case CHIP_POLARIS11: 2034 case CHIP_POLARIS12: 2035 case CHIP_VEGAM: 2036 adev->gfx.mec.num_mec = 2; 2037 break; 2038 case CHIP_TOPAZ: 2039 case CHIP_STONEY: 2040 default: 2041 adev->gfx.mec.num_mec = 1; 2042 break; 2043 } 2044 2045 adev->gfx.mec.num_pipe_per_mec = 4; 2046 adev->gfx.mec.num_queue_per_pipe = 8; 2047 2048 /* EOP Event */ 2049 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 2050 if (r) 2051 return r; 2052 2053 /* Privileged reg */ 2054 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 2055 &adev->gfx.priv_reg_irq); 2056 if (r) 2057 return r; 2058 2059 /* Privileged inst */ 2060 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 2061 &adev->gfx.priv_inst_irq); 2062 if (r) 2063 return r; 2064 2065 /* Add CP EDC/ECC irq */ 2066 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 2067 &adev->gfx.cp_ecc_error_irq); 2068 if (r) 2069 return r; 2070 2071 /* SQ interrupts. */ 2072 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 2073 &adev->gfx.sq_irq); 2074 if (r) { 2075 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2076 return r; 2077 } 2078 2079 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2080 2081 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2082 2083 gfx_v8_0_scratch_init(adev); 2084 2085 r = gfx_v8_0_init_microcode(adev); 2086 if (r) { 2087 DRM_ERROR("Failed to load gfx firmware!\n"); 2088 return r; 2089 } 2090 2091 r = gfx_v8_0_rlc_init(adev); 2092 if (r) { 2093 DRM_ERROR("Failed to init rlc BOs!\n"); 2094 return r; 2095 } 2096 2097 r = gfx_v8_0_mec_init(adev); 2098 if (r) { 2099 DRM_ERROR("Failed to init MEC BOs!\n"); 2100 return r; 2101 } 2102 2103 /* set up the gfx ring */ 2104 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2105 ring = &adev->gfx.gfx_ring[i]; 2106 ring->ring_obj = NULL; 2107 sprintf(ring->name, "gfx"); 2108 /* no gfx doorbells on iceland */ 2109 if (adev->asic_type != CHIP_TOPAZ) { 2110 ring->use_doorbell = true; 2111 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2112 } 2113 2114 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2115 AMDGPU_CP_IRQ_GFX_EOP); 2116 if (r) 2117 return r; 2118 } 2119 2120 2121 /* set up the compute queues - allocate horizontally across pipes */ 2122 ring_id = 0; 2123 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2124 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2125 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2126 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2127 continue; 2128 2129 r = gfx_v8_0_compute_ring_init(adev, 2130 ring_id, 2131 i, k, j); 2132 if (r) 2133 return r; 2134 2135 ring_id++; 2136 } 2137 } 2138 } 2139 2140 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2141 if (r) { 2142 DRM_ERROR("Failed to init KIQ BOs!\n"); 2143 return r; 2144 } 2145 2146 kiq = &adev->gfx.kiq; 2147 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2148 if (r) 2149 return r; 2150 2151 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2152 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2153 if (r) 2154 return r; 2155 2156 adev->gfx.ce_ram_size = 0x8000; 2157 2158 r = gfx_v8_0_gpu_early_init(adev); 2159 if (r) 2160 return r; 2161 2162 return 0; 2163 } 2164 2165 static int gfx_v8_0_sw_fini(void *handle) 2166 { 2167 int i; 2168 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2169 2170 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2171 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2172 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2173 2174 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2175 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2176 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2177 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2178 2179 amdgpu_gfx_compute_mqd_sw_fini(adev); 2180 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2181 amdgpu_gfx_kiq_fini(adev); 2182 2183 gfx_v8_0_mec_fini(adev); 2184 gfx_v8_0_rlc_fini(adev); 2185 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2186 &adev->gfx.rlc.clear_state_gpu_addr, 2187 (void **)&adev->gfx.rlc.cs_ptr); 2188 if ((adev->asic_type == CHIP_CARRIZO) || 2189 (adev->asic_type == CHIP_STONEY)) { 2190 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2191 &adev->gfx.rlc.cp_table_gpu_addr, 2192 (void **)&adev->gfx.rlc.cp_table_ptr); 2193 } 2194 gfx_v8_0_free_microcode(adev); 2195 2196 return 0; 2197 } 2198 2199 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2200 { 2201 uint32_t *modearray, *mod2array; 2202 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2203 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2204 u32 reg_offset; 2205 2206 modearray = adev->gfx.config.tile_mode_array; 2207 mod2array = adev->gfx.config.macrotile_mode_array; 2208 2209 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2210 modearray[reg_offset] = 0; 2211 2212 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2213 mod2array[reg_offset] = 0; 2214 2215 switch (adev->asic_type) { 2216 case CHIP_TOPAZ: 2217 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2218 PIPE_CONFIG(ADDR_SURF_P2) | 2219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2221 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2222 PIPE_CONFIG(ADDR_SURF_P2) | 2223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2225 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2226 PIPE_CONFIG(ADDR_SURF_P2) | 2227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2229 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2230 PIPE_CONFIG(ADDR_SURF_P2) | 2231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2233 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2234 PIPE_CONFIG(ADDR_SURF_P2) | 2235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2237 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2238 PIPE_CONFIG(ADDR_SURF_P2) | 2239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2241 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2242 PIPE_CONFIG(ADDR_SURF_P2) | 2243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2245 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2246 PIPE_CONFIG(ADDR_SURF_P2)); 2247 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2248 PIPE_CONFIG(ADDR_SURF_P2) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2251 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2252 PIPE_CONFIG(ADDR_SURF_P2) | 2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2255 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2256 PIPE_CONFIG(ADDR_SURF_P2) | 2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2259 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2260 PIPE_CONFIG(ADDR_SURF_P2) | 2261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2263 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2264 PIPE_CONFIG(ADDR_SURF_P2) | 2265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2267 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2268 PIPE_CONFIG(ADDR_SURF_P2) | 2269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2271 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2275 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2279 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2283 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2287 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2288 PIPE_CONFIG(ADDR_SURF_P2) | 2289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2291 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2292 PIPE_CONFIG(ADDR_SURF_P2) | 2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2295 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2296 PIPE_CONFIG(ADDR_SURF_P2) | 2297 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2299 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2300 PIPE_CONFIG(ADDR_SURF_P2) | 2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2303 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2304 PIPE_CONFIG(ADDR_SURF_P2) | 2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2307 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2308 PIPE_CONFIG(ADDR_SURF_P2) | 2309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2311 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2312 PIPE_CONFIG(ADDR_SURF_P2) | 2313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2315 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2316 PIPE_CONFIG(ADDR_SURF_P2) | 2317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2319 2320 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2323 NUM_BANKS(ADDR_SURF_8_BANK)); 2324 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2327 NUM_BANKS(ADDR_SURF_8_BANK)); 2328 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2331 NUM_BANKS(ADDR_SURF_8_BANK)); 2332 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2335 NUM_BANKS(ADDR_SURF_8_BANK)); 2336 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2339 NUM_BANKS(ADDR_SURF_8_BANK)); 2340 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2343 NUM_BANKS(ADDR_SURF_8_BANK)); 2344 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2347 NUM_BANKS(ADDR_SURF_8_BANK)); 2348 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2351 NUM_BANKS(ADDR_SURF_16_BANK)); 2352 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2355 NUM_BANKS(ADDR_SURF_16_BANK)); 2356 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2359 NUM_BANKS(ADDR_SURF_16_BANK)); 2360 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2363 NUM_BANKS(ADDR_SURF_16_BANK)); 2364 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2367 NUM_BANKS(ADDR_SURF_16_BANK)); 2368 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2371 NUM_BANKS(ADDR_SURF_16_BANK)); 2372 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2375 NUM_BANKS(ADDR_SURF_8_BANK)); 2376 2377 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2378 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2379 reg_offset != 23) 2380 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2381 2382 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2383 if (reg_offset != 7) 2384 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2385 2386 break; 2387 case CHIP_FIJI: 2388 case CHIP_VEGAM: 2389 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2390 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2393 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2397 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2401 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2405 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2409 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2413 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2417 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2418 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2421 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2423 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2427 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2431 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2435 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2436 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2439 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2443 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2444 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2447 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2451 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2452 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2455 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2456 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2459 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2460 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2463 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2464 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2467 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2468 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2471 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2472 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2475 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2476 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2479 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2480 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2483 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2484 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2487 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2488 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2491 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2492 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2495 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2496 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2497 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2499 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2500 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2503 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2504 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2507 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2508 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2511 2512 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2515 NUM_BANKS(ADDR_SURF_8_BANK)); 2516 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2519 NUM_BANKS(ADDR_SURF_8_BANK)); 2520 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2523 NUM_BANKS(ADDR_SURF_8_BANK)); 2524 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2527 NUM_BANKS(ADDR_SURF_8_BANK)); 2528 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2531 NUM_BANKS(ADDR_SURF_8_BANK)); 2532 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2535 NUM_BANKS(ADDR_SURF_8_BANK)); 2536 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2539 NUM_BANKS(ADDR_SURF_8_BANK)); 2540 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2543 NUM_BANKS(ADDR_SURF_8_BANK)); 2544 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2547 NUM_BANKS(ADDR_SURF_8_BANK)); 2548 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2551 NUM_BANKS(ADDR_SURF_8_BANK)); 2552 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2555 NUM_BANKS(ADDR_SURF_8_BANK)); 2556 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2559 NUM_BANKS(ADDR_SURF_8_BANK)); 2560 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2563 NUM_BANKS(ADDR_SURF_8_BANK)); 2564 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2567 NUM_BANKS(ADDR_SURF_4_BANK)); 2568 2569 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2570 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2571 2572 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2573 if (reg_offset != 7) 2574 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2575 2576 break; 2577 case CHIP_TONGA: 2578 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2580 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2582 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2584 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2586 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2588 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2590 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2592 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2594 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2596 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2598 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2600 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2602 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2604 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2606 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2607 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2610 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2612 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2616 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2620 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2624 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2628 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2632 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2633 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2636 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2640 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2644 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2648 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2649 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2652 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2653 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2656 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2657 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2660 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2661 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2664 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2665 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2668 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2670 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2672 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2673 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2674 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2676 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2677 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2678 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2680 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2681 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2682 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2684 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2685 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2688 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2689 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2692 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2693 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2696 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2700 2701 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2704 NUM_BANKS(ADDR_SURF_16_BANK)); 2705 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2706 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2707 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2708 NUM_BANKS(ADDR_SURF_16_BANK)); 2709 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2712 NUM_BANKS(ADDR_SURF_16_BANK)); 2713 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2716 NUM_BANKS(ADDR_SURF_16_BANK)); 2717 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2720 NUM_BANKS(ADDR_SURF_16_BANK)); 2721 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2724 NUM_BANKS(ADDR_SURF_16_BANK)); 2725 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2728 NUM_BANKS(ADDR_SURF_16_BANK)); 2729 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2730 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2731 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2732 NUM_BANKS(ADDR_SURF_16_BANK)); 2733 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2736 NUM_BANKS(ADDR_SURF_16_BANK)); 2737 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2740 NUM_BANKS(ADDR_SURF_16_BANK)); 2741 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2744 NUM_BANKS(ADDR_SURF_16_BANK)); 2745 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2748 NUM_BANKS(ADDR_SURF_8_BANK)); 2749 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2752 NUM_BANKS(ADDR_SURF_4_BANK)); 2753 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2756 NUM_BANKS(ADDR_SURF_4_BANK)); 2757 2758 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2759 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2760 2761 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2762 if (reg_offset != 7) 2763 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2764 2765 break; 2766 case CHIP_POLARIS11: 2767 case CHIP_POLARIS12: 2768 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2769 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2772 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2776 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2780 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2784 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2788 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2792 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2796 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2800 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2802 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2806 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2810 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2814 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2818 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2822 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2826 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2830 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2834 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2838 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2839 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2842 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2843 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2846 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2847 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2850 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2854 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2855 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2858 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2862 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2866 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2867 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2868 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2870 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2874 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2876 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2878 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2880 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2882 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2883 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2884 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2886 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2888 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2890 2891 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2894 NUM_BANKS(ADDR_SURF_16_BANK)); 2895 2896 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2899 NUM_BANKS(ADDR_SURF_16_BANK)); 2900 2901 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2904 NUM_BANKS(ADDR_SURF_16_BANK)); 2905 2906 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2909 NUM_BANKS(ADDR_SURF_16_BANK)); 2910 2911 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2914 NUM_BANKS(ADDR_SURF_16_BANK)); 2915 2916 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2919 NUM_BANKS(ADDR_SURF_16_BANK)); 2920 2921 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2924 NUM_BANKS(ADDR_SURF_16_BANK)); 2925 2926 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2929 NUM_BANKS(ADDR_SURF_16_BANK)); 2930 2931 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2934 NUM_BANKS(ADDR_SURF_16_BANK)); 2935 2936 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2939 NUM_BANKS(ADDR_SURF_16_BANK)); 2940 2941 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2944 NUM_BANKS(ADDR_SURF_16_BANK)); 2945 2946 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2949 NUM_BANKS(ADDR_SURF_16_BANK)); 2950 2951 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2954 NUM_BANKS(ADDR_SURF_8_BANK)); 2955 2956 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2959 NUM_BANKS(ADDR_SURF_4_BANK)); 2960 2961 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2962 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2963 2964 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2965 if (reg_offset != 7) 2966 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2967 2968 break; 2969 case CHIP_POLARIS10: 2970 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2971 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2972 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2974 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2976 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2978 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2982 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2984 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2986 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2990 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2994 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2996 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2998 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2999 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3002 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3004 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3008 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3012 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3016 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3017 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3020 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3024 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3025 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3028 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3032 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3036 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3040 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3044 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3045 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3048 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3049 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3052 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3056 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3060 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3061 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3062 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3064 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3065 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3066 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3068 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3069 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3072 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3074 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3076 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3078 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3080 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3084 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3088 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3089 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3092 3093 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3096 NUM_BANKS(ADDR_SURF_16_BANK)); 3097 3098 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3101 NUM_BANKS(ADDR_SURF_16_BANK)); 3102 3103 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3104 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3105 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3106 NUM_BANKS(ADDR_SURF_16_BANK)); 3107 3108 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3111 NUM_BANKS(ADDR_SURF_16_BANK)); 3112 3113 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3116 NUM_BANKS(ADDR_SURF_16_BANK)); 3117 3118 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3121 NUM_BANKS(ADDR_SURF_16_BANK)); 3122 3123 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3126 NUM_BANKS(ADDR_SURF_16_BANK)); 3127 3128 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3131 NUM_BANKS(ADDR_SURF_16_BANK)); 3132 3133 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3136 NUM_BANKS(ADDR_SURF_16_BANK)); 3137 3138 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3141 NUM_BANKS(ADDR_SURF_16_BANK)); 3142 3143 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3146 NUM_BANKS(ADDR_SURF_16_BANK)); 3147 3148 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3151 NUM_BANKS(ADDR_SURF_8_BANK)); 3152 3153 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3156 NUM_BANKS(ADDR_SURF_4_BANK)); 3157 3158 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3161 NUM_BANKS(ADDR_SURF_4_BANK)); 3162 3163 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3164 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3165 3166 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3167 if (reg_offset != 7) 3168 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3169 3170 break; 3171 case CHIP_STONEY: 3172 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3173 PIPE_CONFIG(ADDR_SURF_P2) | 3174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3176 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3177 PIPE_CONFIG(ADDR_SURF_P2) | 3178 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3180 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3184 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3188 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3191 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3192 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3195 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3196 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3200 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3201 PIPE_CONFIG(ADDR_SURF_P2)); 3202 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3203 PIPE_CONFIG(ADDR_SURF_P2) | 3204 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3206 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3207 PIPE_CONFIG(ADDR_SURF_P2) | 3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3210 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3211 PIPE_CONFIG(ADDR_SURF_P2) | 3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3214 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3215 PIPE_CONFIG(ADDR_SURF_P2) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3218 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3219 PIPE_CONFIG(ADDR_SURF_P2) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3222 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3223 PIPE_CONFIG(ADDR_SURF_P2) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3226 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3227 PIPE_CONFIG(ADDR_SURF_P2) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3230 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3231 PIPE_CONFIG(ADDR_SURF_P2) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3234 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3235 PIPE_CONFIG(ADDR_SURF_P2) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3238 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3239 PIPE_CONFIG(ADDR_SURF_P2) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3242 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3243 PIPE_CONFIG(ADDR_SURF_P2) | 3244 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3246 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3247 PIPE_CONFIG(ADDR_SURF_P2) | 3248 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3250 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3251 PIPE_CONFIG(ADDR_SURF_P2) | 3252 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3254 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3255 PIPE_CONFIG(ADDR_SURF_P2) | 3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3258 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3259 PIPE_CONFIG(ADDR_SURF_P2) | 3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3262 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3263 PIPE_CONFIG(ADDR_SURF_P2) | 3264 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3266 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3267 PIPE_CONFIG(ADDR_SURF_P2) | 3268 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3270 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3271 PIPE_CONFIG(ADDR_SURF_P2) | 3272 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3274 3275 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3278 NUM_BANKS(ADDR_SURF_8_BANK)); 3279 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3282 NUM_BANKS(ADDR_SURF_8_BANK)); 3283 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3286 NUM_BANKS(ADDR_SURF_8_BANK)); 3287 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3290 NUM_BANKS(ADDR_SURF_8_BANK)); 3291 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3294 NUM_BANKS(ADDR_SURF_8_BANK)); 3295 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3298 NUM_BANKS(ADDR_SURF_8_BANK)); 3299 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3302 NUM_BANKS(ADDR_SURF_8_BANK)); 3303 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3306 NUM_BANKS(ADDR_SURF_16_BANK)); 3307 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3310 NUM_BANKS(ADDR_SURF_16_BANK)); 3311 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3314 NUM_BANKS(ADDR_SURF_16_BANK)); 3315 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3318 NUM_BANKS(ADDR_SURF_16_BANK)); 3319 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3322 NUM_BANKS(ADDR_SURF_16_BANK)); 3323 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3326 NUM_BANKS(ADDR_SURF_16_BANK)); 3327 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3330 NUM_BANKS(ADDR_SURF_8_BANK)); 3331 3332 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3333 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3334 reg_offset != 23) 3335 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3336 3337 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3338 if (reg_offset != 7) 3339 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3340 3341 break; 3342 default: 3343 dev_warn(adev->dev, 3344 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3345 adev->asic_type); 3346 3347 case CHIP_CARRIZO: 3348 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3349 PIPE_CONFIG(ADDR_SURF_P2) | 3350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3352 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3353 PIPE_CONFIG(ADDR_SURF_P2) | 3354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3356 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3357 PIPE_CONFIG(ADDR_SURF_P2) | 3358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3360 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3361 PIPE_CONFIG(ADDR_SURF_P2) | 3362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3364 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3365 PIPE_CONFIG(ADDR_SURF_P2) | 3366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3368 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3369 PIPE_CONFIG(ADDR_SURF_P2) | 3370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3372 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3373 PIPE_CONFIG(ADDR_SURF_P2) | 3374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3376 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3377 PIPE_CONFIG(ADDR_SURF_P2)); 3378 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3379 PIPE_CONFIG(ADDR_SURF_P2) | 3380 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3382 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3383 PIPE_CONFIG(ADDR_SURF_P2) | 3384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3386 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3387 PIPE_CONFIG(ADDR_SURF_P2) | 3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3390 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3391 PIPE_CONFIG(ADDR_SURF_P2) | 3392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3394 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3395 PIPE_CONFIG(ADDR_SURF_P2) | 3396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3398 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3399 PIPE_CONFIG(ADDR_SURF_P2) | 3400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3402 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3403 PIPE_CONFIG(ADDR_SURF_P2) | 3404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3406 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3407 PIPE_CONFIG(ADDR_SURF_P2) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3410 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3411 PIPE_CONFIG(ADDR_SURF_P2) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3414 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3415 PIPE_CONFIG(ADDR_SURF_P2) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3418 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3419 PIPE_CONFIG(ADDR_SURF_P2) | 3420 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3422 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3423 PIPE_CONFIG(ADDR_SURF_P2) | 3424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3426 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3427 PIPE_CONFIG(ADDR_SURF_P2) | 3428 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3430 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3431 PIPE_CONFIG(ADDR_SURF_P2) | 3432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3434 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3435 PIPE_CONFIG(ADDR_SURF_P2) | 3436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3438 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3439 PIPE_CONFIG(ADDR_SURF_P2) | 3440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3442 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3443 PIPE_CONFIG(ADDR_SURF_P2) | 3444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3446 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3447 PIPE_CONFIG(ADDR_SURF_P2) | 3448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3450 3451 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3454 NUM_BANKS(ADDR_SURF_8_BANK)); 3455 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3458 NUM_BANKS(ADDR_SURF_8_BANK)); 3459 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3462 NUM_BANKS(ADDR_SURF_8_BANK)); 3463 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3466 NUM_BANKS(ADDR_SURF_8_BANK)); 3467 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3470 NUM_BANKS(ADDR_SURF_8_BANK)); 3471 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3474 NUM_BANKS(ADDR_SURF_8_BANK)); 3475 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3478 NUM_BANKS(ADDR_SURF_8_BANK)); 3479 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3482 NUM_BANKS(ADDR_SURF_16_BANK)); 3483 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3486 NUM_BANKS(ADDR_SURF_16_BANK)); 3487 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3490 NUM_BANKS(ADDR_SURF_16_BANK)); 3491 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3494 NUM_BANKS(ADDR_SURF_16_BANK)); 3495 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3498 NUM_BANKS(ADDR_SURF_16_BANK)); 3499 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3502 NUM_BANKS(ADDR_SURF_16_BANK)); 3503 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3506 NUM_BANKS(ADDR_SURF_8_BANK)); 3507 3508 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3509 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3510 reg_offset != 23) 3511 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3512 3513 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3514 if (reg_offset != 7) 3515 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3516 3517 break; 3518 } 3519 } 3520 3521 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3522 u32 se_num, u32 sh_num, u32 instance) 3523 { 3524 u32 data; 3525 3526 if (instance == 0xffffffff) 3527 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3528 else 3529 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3530 3531 if (se_num == 0xffffffff) 3532 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3533 else 3534 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3535 3536 if (sh_num == 0xffffffff) 3537 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3538 else 3539 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3540 3541 WREG32(mmGRBM_GFX_INDEX, data); 3542 } 3543 3544 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3545 u32 me, u32 pipe, u32 q) 3546 { 3547 vi_srbm_select(adev, me, pipe, q, 0); 3548 } 3549 3550 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3551 { 3552 u32 data, mask; 3553 3554 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3555 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3556 3557 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3558 3559 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3560 adev->gfx.config.max_sh_per_se); 3561 3562 return (~data) & mask; 3563 } 3564 3565 static void 3566 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3567 { 3568 switch (adev->asic_type) { 3569 case CHIP_FIJI: 3570 case CHIP_VEGAM: 3571 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3572 RB_XSEL2(1) | PKR_MAP(2) | 3573 PKR_XSEL(1) | PKR_YSEL(1) | 3574 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3575 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3576 SE_PAIR_YSEL(2); 3577 break; 3578 case CHIP_TONGA: 3579 case CHIP_POLARIS10: 3580 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3581 SE_XSEL(1) | SE_YSEL(1); 3582 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3583 SE_PAIR_YSEL(2); 3584 break; 3585 case CHIP_TOPAZ: 3586 case CHIP_CARRIZO: 3587 *rconf |= RB_MAP_PKR0(2); 3588 *rconf1 |= 0x0; 3589 break; 3590 case CHIP_POLARIS11: 3591 case CHIP_POLARIS12: 3592 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3593 SE_XSEL(1) | SE_YSEL(1); 3594 *rconf1 |= 0x0; 3595 break; 3596 case CHIP_STONEY: 3597 *rconf |= 0x0; 3598 *rconf1 |= 0x0; 3599 break; 3600 default: 3601 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3602 break; 3603 } 3604 } 3605 3606 static void 3607 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3608 u32 raster_config, u32 raster_config_1, 3609 unsigned rb_mask, unsigned num_rb) 3610 { 3611 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3612 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3613 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3614 unsigned rb_per_se = num_rb / num_se; 3615 unsigned se_mask[4]; 3616 unsigned se; 3617 3618 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3619 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3620 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3621 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3622 3623 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3624 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3625 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3626 3627 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3628 (!se_mask[2] && !se_mask[3]))) { 3629 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3630 3631 if (!se_mask[0] && !se_mask[1]) { 3632 raster_config_1 |= 3633 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3634 } else { 3635 raster_config_1 |= 3636 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3637 } 3638 } 3639 3640 for (se = 0; se < num_se; se++) { 3641 unsigned raster_config_se = raster_config; 3642 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3643 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3644 int idx = (se / 2) * 2; 3645 3646 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3647 raster_config_se &= ~SE_MAP_MASK; 3648 3649 if (!se_mask[idx]) { 3650 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3651 } else { 3652 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3653 } 3654 } 3655 3656 pkr0_mask &= rb_mask; 3657 pkr1_mask &= rb_mask; 3658 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3659 raster_config_se &= ~PKR_MAP_MASK; 3660 3661 if (!pkr0_mask) { 3662 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3663 } else { 3664 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3665 } 3666 } 3667 3668 if (rb_per_se >= 2) { 3669 unsigned rb0_mask = 1 << (se * rb_per_se); 3670 unsigned rb1_mask = rb0_mask << 1; 3671 3672 rb0_mask &= rb_mask; 3673 rb1_mask &= rb_mask; 3674 if (!rb0_mask || !rb1_mask) { 3675 raster_config_se &= ~RB_MAP_PKR0_MASK; 3676 3677 if (!rb0_mask) { 3678 raster_config_se |= 3679 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3680 } else { 3681 raster_config_se |= 3682 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3683 } 3684 } 3685 3686 if (rb_per_se > 2) { 3687 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3688 rb1_mask = rb0_mask << 1; 3689 rb0_mask &= rb_mask; 3690 rb1_mask &= rb_mask; 3691 if (!rb0_mask || !rb1_mask) { 3692 raster_config_se &= ~RB_MAP_PKR1_MASK; 3693 3694 if (!rb0_mask) { 3695 raster_config_se |= 3696 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3697 } else { 3698 raster_config_se |= 3699 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3700 } 3701 } 3702 } 3703 } 3704 3705 /* GRBM_GFX_INDEX has a different offset on VI */ 3706 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3707 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3708 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3709 } 3710 3711 /* GRBM_GFX_INDEX has a different offset on VI */ 3712 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3713 } 3714 3715 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3716 { 3717 int i, j; 3718 u32 data; 3719 u32 raster_config = 0, raster_config_1 = 0; 3720 u32 active_rbs = 0; 3721 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3722 adev->gfx.config.max_sh_per_se; 3723 unsigned num_rb_pipes; 3724 3725 mutex_lock(&adev->grbm_idx_mutex); 3726 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3727 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3728 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3729 data = gfx_v8_0_get_rb_active_bitmap(adev); 3730 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3731 rb_bitmap_width_per_sh); 3732 } 3733 } 3734 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3735 3736 adev->gfx.config.backend_enable_mask = active_rbs; 3737 adev->gfx.config.num_rbs = hweight32(active_rbs); 3738 3739 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3740 adev->gfx.config.max_shader_engines, 16); 3741 3742 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3743 3744 if (!adev->gfx.config.backend_enable_mask || 3745 adev->gfx.config.num_rbs >= num_rb_pipes) { 3746 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3747 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3748 } else { 3749 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3750 adev->gfx.config.backend_enable_mask, 3751 num_rb_pipes); 3752 } 3753 3754 /* cache the values for userspace */ 3755 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3756 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3757 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3758 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3759 RREG32(mmCC_RB_BACKEND_DISABLE); 3760 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3761 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3762 adev->gfx.config.rb_config[i][j].raster_config = 3763 RREG32(mmPA_SC_RASTER_CONFIG); 3764 adev->gfx.config.rb_config[i][j].raster_config_1 = 3765 RREG32(mmPA_SC_RASTER_CONFIG_1); 3766 } 3767 } 3768 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3769 mutex_unlock(&adev->grbm_idx_mutex); 3770 } 3771 3772 /** 3773 * gfx_v8_0_init_compute_vmid - gart enable 3774 * 3775 * @adev: amdgpu_device pointer 3776 * 3777 * Initialize compute vmid sh_mem registers 3778 * 3779 */ 3780 #define DEFAULT_SH_MEM_BASES (0x6000) 3781 #define FIRST_COMPUTE_VMID (8) 3782 #define LAST_COMPUTE_VMID (16) 3783 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3784 { 3785 int i; 3786 uint32_t sh_mem_config; 3787 uint32_t sh_mem_bases; 3788 3789 /* 3790 * Configure apertures: 3791 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3792 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3793 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3794 */ 3795 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3796 3797 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3798 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3799 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3800 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3801 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3802 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3803 3804 mutex_lock(&adev->srbm_mutex); 3805 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3806 vi_srbm_select(adev, 0, 0, 0, i); 3807 /* CP and shaders */ 3808 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3809 WREG32(mmSH_MEM_APE1_BASE, 1); 3810 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3811 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3812 } 3813 vi_srbm_select(adev, 0, 0, 0, 0); 3814 mutex_unlock(&adev->srbm_mutex); 3815 } 3816 3817 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3818 { 3819 switch (adev->asic_type) { 3820 default: 3821 adev->gfx.config.double_offchip_lds_buf = 1; 3822 break; 3823 case CHIP_CARRIZO: 3824 case CHIP_STONEY: 3825 adev->gfx.config.double_offchip_lds_buf = 0; 3826 break; 3827 } 3828 } 3829 3830 static void gfx_v8_0_constants_init(struct amdgpu_device *adev) 3831 { 3832 u32 tmp, sh_static_mem_cfg; 3833 int i; 3834 3835 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3836 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3837 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3838 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3839 3840 gfx_v8_0_tiling_mode_table_init(adev); 3841 gfx_v8_0_setup_rb(adev); 3842 gfx_v8_0_get_cu_info(adev); 3843 gfx_v8_0_config_init(adev); 3844 3845 /* XXX SH_MEM regs */ 3846 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3847 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3848 SWIZZLE_ENABLE, 1); 3849 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3850 ELEMENT_SIZE, 1); 3851 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3852 INDEX_STRIDE, 3); 3853 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3854 3855 mutex_lock(&adev->srbm_mutex); 3856 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3857 vi_srbm_select(adev, 0, 0, 0, i); 3858 /* CP and shaders */ 3859 if (i == 0) { 3860 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3861 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3862 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3863 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3864 WREG32(mmSH_MEM_CONFIG, tmp); 3865 WREG32(mmSH_MEM_BASES, 0); 3866 } else { 3867 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3868 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3869 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3870 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3871 WREG32(mmSH_MEM_CONFIG, tmp); 3872 tmp = adev->gmc.shared_aperture_start >> 48; 3873 WREG32(mmSH_MEM_BASES, tmp); 3874 } 3875 3876 WREG32(mmSH_MEM_APE1_BASE, 1); 3877 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3878 } 3879 vi_srbm_select(adev, 0, 0, 0, 0); 3880 mutex_unlock(&adev->srbm_mutex); 3881 3882 gfx_v8_0_init_compute_vmid(adev); 3883 3884 mutex_lock(&adev->grbm_idx_mutex); 3885 /* 3886 * making sure that the following register writes will be broadcasted 3887 * to all the shaders 3888 */ 3889 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3890 3891 WREG32(mmPA_SC_FIFO_SIZE, 3892 (adev->gfx.config.sc_prim_fifo_size_frontend << 3893 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3894 (adev->gfx.config.sc_prim_fifo_size_backend << 3895 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3896 (adev->gfx.config.sc_hiz_tile_fifo_size << 3897 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3898 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3899 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3900 3901 tmp = RREG32(mmSPI_ARB_PRIORITY); 3902 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3903 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3904 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3905 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3906 WREG32(mmSPI_ARB_PRIORITY, tmp); 3907 3908 mutex_unlock(&adev->grbm_idx_mutex); 3909 3910 } 3911 3912 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3913 { 3914 u32 i, j, k; 3915 u32 mask; 3916 3917 mutex_lock(&adev->grbm_idx_mutex); 3918 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3919 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3920 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3921 for (k = 0; k < adev->usec_timeout; k++) { 3922 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3923 break; 3924 udelay(1); 3925 } 3926 if (k == adev->usec_timeout) { 3927 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3928 0xffffffff, 0xffffffff); 3929 mutex_unlock(&adev->grbm_idx_mutex); 3930 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3931 i, j); 3932 return; 3933 } 3934 } 3935 } 3936 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3937 mutex_unlock(&adev->grbm_idx_mutex); 3938 3939 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3940 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3941 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3942 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3943 for (k = 0; k < adev->usec_timeout; k++) { 3944 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3945 break; 3946 udelay(1); 3947 } 3948 } 3949 3950 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3951 bool enable) 3952 { 3953 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3954 3955 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3956 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3957 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3958 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3959 3960 WREG32(mmCP_INT_CNTL_RING0, tmp); 3961 } 3962 3963 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3964 { 3965 /* csib */ 3966 WREG32(mmRLC_CSIB_ADDR_HI, 3967 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3968 WREG32(mmRLC_CSIB_ADDR_LO, 3969 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3970 WREG32(mmRLC_CSIB_LENGTH, 3971 adev->gfx.rlc.clear_state_size); 3972 } 3973 3974 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3975 int ind_offset, 3976 int list_size, 3977 int *unique_indices, 3978 int *indices_count, 3979 int max_indices, 3980 int *ind_start_offsets, 3981 int *offset_count, 3982 int max_offset) 3983 { 3984 int indices; 3985 bool new_entry = true; 3986 3987 for (; ind_offset < list_size; ind_offset++) { 3988 3989 if (new_entry) { 3990 new_entry = false; 3991 ind_start_offsets[*offset_count] = ind_offset; 3992 *offset_count = *offset_count + 1; 3993 BUG_ON(*offset_count >= max_offset); 3994 } 3995 3996 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3997 new_entry = true; 3998 continue; 3999 } 4000 4001 ind_offset += 2; 4002 4003 /* look for the matching indice */ 4004 for (indices = 0; 4005 indices < *indices_count; 4006 indices++) { 4007 if (unique_indices[indices] == 4008 register_list_format[ind_offset]) 4009 break; 4010 } 4011 4012 if (indices >= *indices_count) { 4013 unique_indices[*indices_count] = 4014 register_list_format[ind_offset]; 4015 indices = *indices_count; 4016 *indices_count = *indices_count + 1; 4017 BUG_ON(*indices_count >= max_indices); 4018 } 4019 4020 register_list_format[ind_offset] = indices; 4021 } 4022 } 4023 4024 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4025 { 4026 int i, temp, data; 4027 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4028 int indices_count = 0; 4029 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4030 int offset_count = 0; 4031 4032 int list_size; 4033 unsigned int *register_list_format = 4034 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4035 if (!register_list_format) 4036 return -ENOMEM; 4037 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4038 adev->gfx.rlc.reg_list_format_size_bytes); 4039 4040 gfx_v8_0_parse_ind_reg_list(register_list_format, 4041 RLC_FormatDirectRegListLength, 4042 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4043 unique_indices, 4044 &indices_count, 4045 ARRAY_SIZE(unique_indices), 4046 indirect_start_offsets, 4047 &offset_count, 4048 ARRAY_SIZE(indirect_start_offsets)); 4049 4050 /* save and restore list */ 4051 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4052 4053 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4054 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4055 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4056 4057 /* indirect list */ 4058 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4059 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4060 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4061 4062 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4063 list_size = list_size >> 1; 4064 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4065 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4066 4067 /* starting offsets starts */ 4068 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4069 adev->gfx.rlc.starting_offsets_start); 4070 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4071 WREG32(mmRLC_GPM_SCRATCH_DATA, 4072 indirect_start_offsets[i]); 4073 4074 /* unique indices */ 4075 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4076 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4077 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4078 if (unique_indices[i] != 0) { 4079 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4080 WREG32(data + i, unique_indices[i] >> 20); 4081 } 4082 } 4083 kfree(register_list_format); 4084 4085 return 0; 4086 } 4087 4088 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4089 { 4090 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4091 } 4092 4093 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4094 { 4095 uint32_t data; 4096 4097 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4098 4099 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4100 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4101 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4102 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4103 WREG32(mmRLC_PG_DELAY, data); 4104 4105 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4106 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4107 4108 } 4109 4110 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4111 bool enable) 4112 { 4113 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4114 } 4115 4116 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4117 bool enable) 4118 { 4119 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4120 } 4121 4122 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4123 { 4124 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4125 } 4126 4127 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4128 { 4129 if ((adev->asic_type == CHIP_CARRIZO) || 4130 (adev->asic_type == CHIP_STONEY)) { 4131 gfx_v8_0_init_csb(adev); 4132 gfx_v8_0_init_save_restore_list(adev); 4133 gfx_v8_0_enable_save_restore_machine(adev); 4134 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4135 gfx_v8_0_init_power_gating(adev); 4136 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4137 } else if ((adev->asic_type == CHIP_POLARIS11) || 4138 (adev->asic_type == CHIP_POLARIS12) || 4139 (adev->asic_type == CHIP_VEGAM)) { 4140 gfx_v8_0_init_csb(adev); 4141 gfx_v8_0_init_save_restore_list(adev); 4142 gfx_v8_0_enable_save_restore_machine(adev); 4143 gfx_v8_0_init_power_gating(adev); 4144 } 4145 4146 } 4147 4148 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4149 { 4150 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4151 4152 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4153 gfx_v8_0_wait_for_rlc_serdes(adev); 4154 } 4155 4156 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4157 { 4158 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4159 udelay(50); 4160 4161 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4162 udelay(50); 4163 } 4164 4165 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4166 { 4167 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4168 4169 /* carrizo do enable cp interrupt after cp inited */ 4170 if (!(adev->flags & AMD_IS_APU)) 4171 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4172 4173 udelay(50); 4174 } 4175 4176 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4177 { 4178 gfx_v8_0_rlc_stop(adev); 4179 gfx_v8_0_rlc_reset(adev); 4180 gfx_v8_0_init_pg(adev); 4181 gfx_v8_0_rlc_start(adev); 4182 4183 return 0; 4184 } 4185 4186 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4187 { 4188 int i; 4189 u32 tmp = RREG32(mmCP_ME_CNTL); 4190 4191 if (enable) { 4192 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4193 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4195 } else { 4196 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4197 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4199 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4200 adev->gfx.gfx_ring[i].ready = false; 4201 } 4202 WREG32(mmCP_ME_CNTL, tmp); 4203 udelay(50); 4204 } 4205 4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4207 { 4208 u32 count = 0; 4209 const struct cs_section_def *sect = NULL; 4210 const struct cs_extent_def *ext = NULL; 4211 4212 /* begin clear state */ 4213 count += 2; 4214 /* context control state */ 4215 count += 3; 4216 4217 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4218 for (ext = sect->section; ext->extent != NULL; ++ext) { 4219 if (sect->id == SECT_CONTEXT) 4220 count += 2 + ext->reg_count; 4221 else 4222 return 0; 4223 } 4224 } 4225 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4226 count += 4; 4227 /* end clear state */ 4228 count += 2; 4229 /* clear state */ 4230 count += 2; 4231 4232 return count; 4233 } 4234 4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4236 { 4237 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4238 const struct cs_section_def *sect = NULL; 4239 const struct cs_extent_def *ext = NULL; 4240 int r, i; 4241 4242 /* init the CP */ 4243 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4244 WREG32(mmCP_ENDIAN_SWAP, 0); 4245 WREG32(mmCP_DEVICE_ID, 1); 4246 4247 gfx_v8_0_cp_gfx_enable(adev, true); 4248 4249 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4250 if (r) { 4251 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4252 return r; 4253 } 4254 4255 /* clear state buffer */ 4256 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4257 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4258 4259 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4260 amdgpu_ring_write(ring, 0x80000000); 4261 amdgpu_ring_write(ring, 0x80000000); 4262 4263 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4264 for (ext = sect->section; ext->extent != NULL; ++ext) { 4265 if (sect->id == SECT_CONTEXT) { 4266 amdgpu_ring_write(ring, 4267 PACKET3(PACKET3_SET_CONTEXT_REG, 4268 ext->reg_count)); 4269 amdgpu_ring_write(ring, 4270 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4271 for (i = 0; i < ext->reg_count; i++) 4272 amdgpu_ring_write(ring, ext->extent[i]); 4273 } 4274 } 4275 } 4276 4277 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4278 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4279 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4280 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4281 4282 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4283 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4284 4285 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4286 amdgpu_ring_write(ring, 0); 4287 4288 /* init the CE partitions */ 4289 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4290 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4291 amdgpu_ring_write(ring, 0x8000); 4292 amdgpu_ring_write(ring, 0x8000); 4293 4294 amdgpu_ring_commit(ring); 4295 4296 return 0; 4297 } 4298 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4299 { 4300 u32 tmp; 4301 /* no gfx doorbells on iceland */ 4302 if (adev->asic_type == CHIP_TOPAZ) 4303 return; 4304 4305 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4306 4307 if (ring->use_doorbell) { 4308 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4309 DOORBELL_OFFSET, ring->doorbell_index); 4310 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4311 DOORBELL_HIT, 0); 4312 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4313 DOORBELL_EN, 1); 4314 } else { 4315 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4316 } 4317 4318 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4319 4320 if (adev->flags & AMD_IS_APU) 4321 return; 4322 4323 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4324 DOORBELL_RANGE_LOWER, 4325 AMDGPU_DOORBELL_GFX_RING0); 4326 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4327 4328 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4329 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4330 } 4331 4332 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4333 { 4334 struct amdgpu_ring *ring; 4335 u32 tmp; 4336 u32 rb_bufsz; 4337 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4338 int r; 4339 4340 /* Set the write pointer delay */ 4341 WREG32(mmCP_RB_WPTR_DELAY, 0); 4342 4343 /* set the RB to use vmid 0 */ 4344 WREG32(mmCP_RB_VMID, 0); 4345 4346 /* Set ring buffer size */ 4347 ring = &adev->gfx.gfx_ring[0]; 4348 rb_bufsz = order_base_2(ring->ring_size / 8); 4349 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4350 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4351 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4352 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4353 #ifdef __BIG_ENDIAN 4354 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4355 #endif 4356 WREG32(mmCP_RB0_CNTL, tmp); 4357 4358 /* Initialize the ring buffer's read and write pointers */ 4359 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4360 ring->wptr = 0; 4361 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4362 4363 /* set the wb address wether it's enabled or not */ 4364 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4365 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4366 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4367 4368 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4369 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4370 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4371 mdelay(1); 4372 WREG32(mmCP_RB0_CNTL, tmp); 4373 4374 rb_addr = ring->gpu_addr >> 8; 4375 WREG32(mmCP_RB0_BASE, rb_addr); 4376 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4377 4378 gfx_v8_0_set_cpg_door_bell(adev, ring); 4379 /* start the ring */ 4380 amdgpu_ring_clear_ring(ring); 4381 gfx_v8_0_cp_gfx_start(adev); 4382 ring->ready = true; 4383 r = amdgpu_ring_test_ring(ring); 4384 if (r) 4385 ring->ready = false; 4386 4387 return r; 4388 } 4389 4390 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4391 { 4392 int i; 4393 4394 if (enable) { 4395 WREG32(mmCP_MEC_CNTL, 0); 4396 } else { 4397 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4398 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4399 adev->gfx.compute_ring[i].ready = false; 4400 adev->gfx.kiq.ring.ready = false; 4401 } 4402 udelay(50); 4403 } 4404 4405 /* KIQ functions */ 4406 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4407 { 4408 uint32_t tmp; 4409 struct amdgpu_device *adev = ring->adev; 4410 4411 /* tell RLC which is KIQ queue */ 4412 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4413 tmp &= 0xffffff00; 4414 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4415 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4416 tmp |= 0x80; 4417 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4418 } 4419 4420 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4421 { 4422 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4423 uint64_t queue_mask = 0; 4424 int r, i; 4425 4426 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4427 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4428 continue; 4429 4430 /* This situation may be hit in the future if a new HW 4431 * generation exposes more than 64 queues. If so, the 4432 * definition of queue_mask needs updating */ 4433 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4434 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4435 break; 4436 } 4437 4438 queue_mask |= (1ull << i); 4439 } 4440 4441 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8); 4442 if (r) { 4443 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4444 return r; 4445 } 4446 /* set resources */ 4447 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4448 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4449 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4450 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4451 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4452 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4453 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4454 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4455 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4456 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4457 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4458 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4459 4460 /* map queues */ 4461 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4462 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4463 amdgpu_ring_write(kiq_ring, 4464 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4465 amdgpu_ring_write(kiq_ring, 4466 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4467 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4468 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4469 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4470 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4471 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4472 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4473 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4474 } 4475 4476 r = amdgpu_ring_test_ring(kiq_ring); 4477 if (r) { 4478 DRM_ERROR("KCQ enable failed\n"); 4479 kiq_ring->ready = false; 4480 } 4481 return r; 4482 } 4483 4484 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4485 { 4486 int i, r = 0; 4487 4488 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4489 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4490 for (i = 0; i < adev->usec_timeout; i++) { 4491 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4492 break; 4493 udelay(1); 4494 } 4495 if (i == adev->usec_timeout) 4496 r = -ETIMEDOUT; 4497 } 4498 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4499 WREG32(mmCP_HQD_PQ_RPTR, 0); 4500 WREG32(mmCP_HQD_PQ_WPTR, 0); 4501 4502 return r; 4503 } 4504 4505 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4506 { 4507 struct amdgpu_device *adev = ring->adev; 4508 struct vi_mqd *mqd = ring->mqd_ptr; 4509 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4510 uint32_t tmp; 4511 4512 mqd->header = 0xC0310800; 4513 mqd->compute_pipelinestat_enable = 0x00000001; 4514 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4515 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4516 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4517 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4518 mqd->compute_misc_reserved = 0x00000003; 4519 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4520 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4521 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4522 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4523 eop_base_addr = ring->eop_gpu_addr >> 8; 4524 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4525 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4526 4527 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4528 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4529 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4530 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4531 4532 mqd->cp_hqd_eop_control = tmp; 4533 4534 /* enable doorbell? */ 4535 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4536 CP_HQD_PQ_DOORBELL_CONTROL, 4537 DOORBELL_EN, 4538 ring->use_doorbell ? 1 : 0); 4539 4540 mqd->cp_hqd_pq_doorbell_control = tmp; 4541 4542 /* set the pointer to the MQD */ 4543 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4544 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4545 4546 /* set MQD vmid to 0 */ 4547 tmp = RREG32(mmCP_MQD_CONTROL); 4548 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4549 mqd->cp_mqd_control = tmp; 4550 4551 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4552 hqd_gpu_addr = ring->gpu_addr >> 8; 4553 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4554 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4555 4556 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4557 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4558 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4559 (order_base_2(ring->ring_size / 4) - 1)); 4560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4561 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4562 #ifdef __BIG_ENDIAN 4563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4564 #endif 4565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4568 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4569 mqd->cp_hqd_pq_control = tmp; 4570 4571 /* set the wb address whether it's enabled or not */ 4572 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4573 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4574 mqd->cp_hqd_pq_rptr_report_addr_hi = 4575 upper_32_bits(wb_gpu_addr) & 0xffff; 4576 4577 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4578 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4579 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4580 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4581 4582 tmp = 0; 4583 /* enable the doorbell if requested */ 4584 if (ring->use_doorbell) { 4585 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4586 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4587 DOORBELL_OFFSET, ring->doorbell_index); 4588 4589 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4590 DOORBELL_EN, 1); 4591 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4592 DOORBELL_SOURCE, 0); 4593 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4594 DOORBELL_HIT, 0); 4595 } 4596 4597 mqd->cp_hqd_pq_doorbell_control = tmp; 4598 4599 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4600 ring->wptr = 0; 4601 mqd->cp_hqd_pq_wptr = ring->wptr; 4602 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4603 4604 /* set the vmid for the queue */ 4605 mqd->cp_hqd_vmid = 0; 4606 4607 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4608 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4609 mqd->cp_hqd_persistent_state = tmp; 4610 4611 /* set MTYPE */ 4612 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4613 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4614 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4615 mqd->cp_hqd_ib_control = tmp; 4616 4617 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4618 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4619 mqd->cp_hqd_iq_timer = tmp; 4620 4621 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4622 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4623 mqd->cp_hqd_ctx_save_control = tmp; 4624 4625 /* defaults */ 4626 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4627 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4628 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4629 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4630 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4631 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4632 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4633 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4634 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4635 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4636 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4637 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4638 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4639 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4640 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4641 4642 /* activate the queue */ 4643 mqd->cp_hqd_active = 1; 4644 4645 return 0; 4646 } 4647 4648 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4649 struct vi_mqd *mqd) 4650 { 4651 uint32_t mqd_reg; 4652 uint32_t *mqd_data; 4653 4654 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4655 mqd_data = &mqd->cp_mqd_base_addr_lo; 4656 4657 /* disable wptr polling */ 4658 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4659 4660 /* program all HQD registers */ 4661 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4662 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4663 4664 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4665 * This is safe since EOP RPTR==WPTR for any inactive HQD 4666 * on ASICs that do not support context-save. 4667 * EOP writes/reads can start anywhere in the ring. 4668 */ 4669 if (adev->asic_type != CHIP_TONGA) { 4670 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4671 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4672 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4673 } 4674 4675 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4676 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4677 4678 /* activate the HQD */ 4679 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4680 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4681 4682 return 0; 4683 } 4684 4685 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4686 { 4687 struct amdgpu_device *adev = ring->adev; 4688 struct vi_mqd *mqd = ring->mqd_ptr; 4689 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4690 4691 gfx_v8_0_kiq_setting(ring); 4692 4693 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4694 /* reset MQD to a clean status */ 4695 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4696 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4697 4698 /* reset ring buffer */ 4699 ring->wptr = 0; 4700 amdgpu_ring_clear_ring(ring); 4701 mutex_lock(&adev->srbm_mutex); 4702 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4703 gfx_v8_0_mqd_commit(adev, mqd); 4704 vi_srbm_select(adev, 0, 0, 0, 0); 4705 mutex_unlock(&adev->srbm_mutex); 4706 } else { 4707 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4708 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4709 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4710 mutex_lock(&adev->srbm_mutex); 4711 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4712 gfx_v8_0_mqd_init(ring); 4713 gfx_v8_0_mqd_commit(adev, mqd); 4714 vi_srbm_select(adev, 0, 0, 0, 0); 4715 mutex_unlock(&adev->srbm_mutex); 4716 4717 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4718 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4719 } 4720 4721 return 0; 4722 } 4723 4724 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4725 { 4726 struct amdgpu_device *adev = ring->adev; 4727 struct vi_mqd *mqd = ring->mqd_ptr; 4728 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4729 4730 if (!adev->in_gpu_reset && !adev->in_suspend) { 4731 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4732 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4733 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4734 mutex_lock(&adev->srbm_mutex); 4735 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4736 gfx_v8_0_mqd_init(ring); 4737 vi_srbm_select(adev, 0, 0, 0, 0); 4738 mutex_unlock(&adev->srbm_mutex); 4739 4740 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4741 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4742 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4743 /* reset MQD to a clean status */ 4744 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4745 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4746 /* reset ring buffer */ 4747 ring->wptr = 0; 4748 amdgpu_ring_clear_ring(ring); 4749 } else { 4750 amdgpu_ring_clear_ring(ring); 4751 } 4752 return 0; 4753 } 4754 4755 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4756 { 4757 if (adev->asic_type > CHIP_TONGA) { 4758 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4759 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4760 } 4761 /* enable doorbells */ 4762 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4763 } 4764 4765 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4766 { 4767 struct amdgpu_ring *ring; 4768 int r; 4769 4770 ring = &adev->gfx.kiq.ring; 4771 4772 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4773 if (unlikely(r != 0)) 4774 return r; 4775 4776 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4777 if (unlikely(r != 0)) 4778 return r; 4779 4780 gfx_v8_0_kiq_init_queue(ring); 4781 amdgpu_bo_kunmap(ring->mqd_obj); 4782 ring->mqd_ptr = NULL; 4783 amdgpu_bo_unreserve(ring->mqd_obj); 4784 ring->ready = true; 4785 return 0; 4786 } 4787 4788 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) 4789 { 4790 struct amdgpu_ring *ring = NULL; 4791 int r = 0, i; 4792 4793 gfx_v8_0_cp_compute_enable(adev, true); 4794 4795 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4796 ring = &adev->gfx.compute_ring[i]; 4797 4798 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4799 if (unlikely(r != 0)) 4800 goto done; 4801 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4802 if (!r) { 4803 r = gfx_v8_0_kcq_init_queue(ring); 4804 amdgpu_bo_kunmap(ring->mqd_obj); 4805 ring->mqd_ptr = NULL; 4806 } 4807 amdgpu_bo_unreserve(ring->mqd_obj); 4808 if (r) 4809 goto done; 4810 } 4811 4812 gfx_v8_0_set_mec_doorbell_range(adev); 4813 4814 r = gfx_v8_0_kiq_kcq_enable(adev); 4815 if (r) 4816 goto done; 4817 4818 /* Test KCQs */ 4819 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4820 ring = &adev->gfx.compute_ring[i]; 4821 ring->ready = true; 4822 r = amdgpu_ring_test_ring(ring); 4823 if (r) 4824 ring->ready = false; 4825 } 4826 4827 done: 4828 return r; 4829 } 4830 4831 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4832 { 4833 int r; 4834 4835 if (!(adev->flags & AMD_IS_APU)) 4836 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4837 4838 r = gfx_v8_0_kiq_resume(adev); 4839 if (r) 4840 return r; 4841 4842 r = gfx_v8_0_cp_gfx_resume(adev); 4843 if (r) 4844 return r; 4845 4846 r = gfx_v8_0_kcq_resume(adev); 4847 if (r) 4848 return r; 4849 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4850 4851 return 0; 4852 } 4853 4854 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4855 { 4856 gfx_v8_0_cp_gfx_enable(adev, enable); 4857 gfx_v8_0_cp_compute_enable(adev, enable); 4858 } 4859 4860 static int gfx_v8_0_hw_init(void *handle) 4861 { 4862 int r; 4863 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4864 4865 gfx_v8_0_init_golden_registers(adev); 4866 gfx_v8_0_constants_init(adev); 4867 4868 r = gfx_v8_0_rlc_resume(adev); 4869 if (r) 4870 return r; 4871 4872 r = gfx_v8_0_cp_resume(adev); 4873 4874 return r; 4875 } 4876 4877 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) 4878 { 4879 int r, i; 4880 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4881 4882 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 4883 if (r) 4884 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4885 4886 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4887 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4888 4889 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 4890 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 4891 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 4892 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 4893 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 4894 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 4895 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 4896 amdgpu_ring_write(kiq_ring, 0); 4897 amdgpu_ring_write(kiq_ring, 0); 4898 amdgpu_ring_write(kiq_ring, 0); 4899 } 4900 r = amdgpu_ring_test_ring(kiq_ring); 4901 if (r) 4902 DRM_ERROR("KCQ disable failed\n"); 4903 4904 return r; 4905 } 4906 4907 static bool gfx_v8_0_is_idle(void *handle) 4908 { 4909 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4910 4911 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE) 4912 || RREG32(mmGRBM_STATUS2) != 0x8) 4913 return false; 4914 else 4915 return true; 4916 } 4917 4918 static bool gfx_v8_0_rlc_is_idle(void *handle) 4919 { 4920 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4921 4922 if (RREG32(mmGRBM_STATUS2) != 0x8) 4923 return false; 4924 else 4925 return true; 4926 } 4927 4928 static int gfx_v8_0_wait_for_rlc_idle(void *handle) 4929 { 4930 unsigned int i; 4931 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4932 4933 for (i = 0; i < adev->usec_timeout; i++) { 4934 if (gfx_v8_0_rlc_is_idle(handle)) 4935 return 0; 4936 4937 udelay(1); 4938 } 4939 return -ETIMEDOUT; 4940 } 4941 4942 static int gfx_v8_0_wait_for_idle(void *handle) 4943 { 4944 unsigned int i; 4945 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4946 4947 for (i = 0; i < adev->usec_timeout; i++) { 4948 if (gfx_v8_0_is_idle(handle)) 4949 return 0; 4950 4951 udelay(1); 4952 } 4953 return -ETIMEDOUT; 4954 } 4955 4956 static int gfx_v8_0_hw_fini(void *handle) 4957 { 4958 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4959 4960 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4961 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4962 4963 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4964 4965 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 4966 4967 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4968 gfx_v8_0_kcq_disable(adev); 4969 4970 if (amdgpu_sriov_vf(adev)) { 4971 pr_debug("For SRIOV client, shouldn't do anything.\n"); 4972 return 0; 4973 } 4974 adev->gfx.rlc.funcs->enter_safe_mode(adev); 4975 if (!gfx_v8_0_wait_for_idle(adev)) 4976 gfx_v8_0_cp_enable(adev, false); 4977 else 4978 pr_err("cp is busy, skip halt cp\n"); 4979 if (!gfx_v8_0_wait_for_rlc_idle(adev)) 4980 gfx_v8_0_rlc_stop(adev); 4981 else 4982 pr_err("rlc is busy, skip halt rlc\n"); 4983 adev->gfx.rlc.funcs->exit_safe_mode(adev); 4984 return 0; 4985 } 4986 4987 static int gfx_v8_0_suspend(void *handle) 4988 { 4989 return gfx_v8_0_hw_fini(handle); 4990 } 4991 4992 static int gfx_v8_0_resume(void *handle) 4993 { 4994 return gfx_v8_0_hw_init(handle); 4995 } 4996 4997 static bool gfx_v8_0_check_soft_reset(void *handle) 4998 { 4999 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5000 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5001 u32 tmp; 5002 5003 /* GRBM_STATUS */ 5004 tmp = RREG32(mmGRBM_STATUS); 5005 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5006 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5007 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5008 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5009 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5010 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5011 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5012 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5013 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5014 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5015 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5016 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5017 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5018 } 5019 5020 /* GRBM_STATUS2 */ 5021 tmp = RREG32(mmGRBM_STATUS2); 5022 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5023 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5024 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5025 5026 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5027 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5028 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5029 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5030 SOFT_RESET_CPF, 1); 5031 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5032 SOFT_RESET_CPC, 1); 5033 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5034 SOFT_RESET_CPG, 1); 5035 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5036 SOFT_RESET_GRBM, 1); 5037 } 5038 5039 /* SRBM_STATUS */ 5040 tmp = RREG32(mmSRBM_STATUS); 5041 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5042 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5043 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5044 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5045 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5046 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5047 5048 if (grbm_soft_reset || srbm_soft_reset) { 5049 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5050 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5051 return true; 5052 } else { 5053 adev->gfx.grbm_soft_reset = 0; 5054 adev->gfx.srbm_soft_reset = 0; 5055 return false; 5056 } 5057 } 5058 5059 static int gfx_v8_0_pre_soft_reset(void *handle) 5060 { 5061 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5062 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5063 5064 if ((!adev->gfx.grbm_soft_reset) && 5065 (!adev->gfx.srbm_soft_reset)) 5066 return 0; 5067 5068 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5069 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5070 5071 /* stop the rlc */ 5072 gfx_v8_0_rlc_stop(adev); 5073 5074 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5075 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5076 /* Disable GFX parsing/prefetching */ 5077 gfx_v8_0_cp_gfx_enable(adev, false); 5078 5079 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5080 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5081 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5082 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5083 int i; 5084 5085 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5086 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5087 5088 mutex_lock(&adev->srbm_mutex); 5089 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5090 gfx_v8_0_deactivate_hqd(adev, 2); 5091 vi_srbm_select(adev, 0, 0, 0, 0); 5092 mutex_unlock(&adev->srbm_mutex); 5093 } 5094 /* Disable MEC parsing/prefetching */ 5095 gfx_v8_0_cp_compute_enable(adev, false); 5096 } 5097 5098 return 0; 5099 } 5100 5101 static int gfx_v8_0_soft_reset(void *handle) 5102 { 5103 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5104 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5105 u32 tmp; 5106 5107 if ((!adev->gfx.grbm_soft_reset) && 5108 (!adev->gfx.srbm_soft_reset)) 5109 return 0; 5110 5111 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5112 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5113 5114 if (grbm_soft_reset || srbm_soft_reset) { 5115 tmp = RREG32(mmGMCON_DEBUG); 5116 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5117 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5118 WREG32(mmGMCON_DEBUG, tmp); 5119 udelay(50); 5120 } 5121 5122 if (grbm_soft_reset) { 5123 tmp = RREG32(mmGRBM_SOFT_RESET); 5124 tmp |= grbm_soft_reset; 5125 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5126 WREG32(mmGRBM_SOFT_RESET, tmp); 5127 tmp = RREG32(mmGRBM_SOFT_RESET); 5128 5129 udelay(50); 5130 5131 tmp &= ~grbm_soft_reset; 5132 WREG32(mmGRBM_SOFT_RESET, tmp); 5133 tmp = RREG32(mmGRBM_SOFT_RESET); 5134 } 5135 5136 if (srbm_soft_reset) { 5137 tmp = RREG32(mmSRBM_SOFT_RESET); 5138 tmp |= srbm_soft_reset; 5139 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5140 WREG32(mmSRBM_SOFT_RESET, tmp); 5141 tmp = RREG32(mmSRBM_SOFT_RESET); 5142 5143 udelay(50); 5144 5145 tmp &= ~srbm_soft_reset; 5146 WREG32(mmSRBM_SOFT_RESET, tmp); 5147 tmp = RREG32(mmSRBM_SOFT_RESET); 5148 } 5149 5150 if (grbm_soft_reset || srbm_soft_reset) { 5151 tmp = RREG32(mmGMCON_DEBUG); 5152 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5153 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5154 WREG32(mmGMCON_DEBUG, tmp); 5155 } 5156 5157 /* Wait a little for things to settle down */ 5158 udelay(50); 5159 5160 return 0; 5161 } 5162 5163 static int gfx_v8_0_post_soft_reset(void *handle) 5164 { 5165 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5166 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5167 5168 if ((!adev->gfx.grbm_soft_reset) && 5169 (!adev->gfx.srbm_soft_reset)) 5170 return 0; 5171 5172 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5173 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5174 5175 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5176 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5177 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5178 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5179 int i; 5180 5181 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5182 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5183 5184 mutex_lock(&adev->srbm_mutex); 5185 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5186 gfx_v8_0_deactivate_hqd(adev, 2); 5187 vi_srbm_select(adev, 0, 0, 0, 0); 5188 mutex_unlock(&adev->srbm_mutex); 5189 } 5190 gfx_v8_0_kiq_resume(adev); 5191 gfx_v8_0_kcq_resume(adev); 5192 } 5193 5194 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5195 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5196 gfx_v8_0_cp_gfx_resume(adev); 5197 5198 gfx_v8_0_rlc_start(adev); 5199 5200 return 0; 5201 } 5202 5203 /** 5204 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5205 * 5206 * @adev: amdgpu_device pointer 5207 * 5208 * Fetches a GPU clock counter snapshot. 5209 * Returns the 64 bit clock counter snapshot. 5210 */ 5211 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5212 { 5213 uint64_t clock; 5214 5215 mutex_lock(&adev->gfx.gpu_clock_mutex); 5216 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5217 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5218 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5219 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5220 return clock; 5221 } 5222 5223 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5224 uint32_t vmid, 5225 uint32_t gds_base, uint32_t gds_size, 5226 uint32_t gws_base, uint32_t gws_size, 5227 uint32_t oa_base, uint32_t oa_size) 5228 { 5229 /* GDS Base */ 5230 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5231 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5232 WRITE_DATA_DST_SEL(0))); 5233 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5234 amdgpu_ring_write(ring, 0); 5235 amdgpu_ring_write(ring, gds_base); 5236 5237 /* GDS Size */ 5238 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5239 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5240 WRITE_DATA_DST_SEL(0))); 5241 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5242 amdgpu_ring_write(ring, 0); 5243 amdgpu_ring_write(ring, gds_size); 5244 5245 /* GWS */ 5246 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5247 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5248 WRITE_DATA_DST_SEL(0))); 5249 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5250 amdgpu_ring_write(ring, 0); 5251 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5252 5253 /* OA */ 5254 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5255 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5256 WRITE_DATA_DST_SEL(0))); 5257 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5258 amdgpu_ring_write(ring, 0); 5259 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5260 } 5261 5262 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5263 { 5264 WREG32(mmSQ_IND_INDEX, 5265 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5266 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5267 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5268 (SQ_IND_INDEX__FORCE_READ_MASK)); 5269 return RREG32(mmSQ_IND_DATA); 5270 } 5271 5272 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5273 uint32_t wave, uint32_t thread, 5274 uint32_t regno, uint32_t num, uint32_t *out) 5275 { 5276 WREG32(mmSQ_IND_INDEX, 5277 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5278 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5279 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5280 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5281 (SQ_IND_INDEX__FORCE_READ_MASK) | 5282 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5283 while (num--) 5284 *(out++) = RREG32(mmSQ_IND_DATA); 5285 } 5286 5287 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5288 { 5289 /* type 0 wave data */ 5290 dst[(*no_fields)++] = 0; 5291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5294 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5295 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5296 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5297 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5298 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5299 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5300 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5301 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5302 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5303 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5304 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5305 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5306 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5307 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5308 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5309 } 5310 5311 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5312 uint32_t wave, uint32_t start, 5313 uint32_t size, uint32_t *dst) 5314 { 5315 wave_read_regs( 5316 adev, simd, wave, 0, 5317 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5318 } 5319 5320 5321 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5322 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5323 .select_se_sh = &gfx_v8_0_select_se_sh, 5324 .read_wave_data = &gfx_v8_0_read_wave_data, 5325 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5326 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5327 }; 5328 5329 static int gfx_v8_0_early_init(void *handle) 5330 { 5331 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5332 5333 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5334 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5335 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5336 gfx_v8_0_set_ring_funcs(adev); 5337 gfx_v8_0_set_irq_funcs(adev); 5338 gfx_v8_0_set_gds_init(adev); 5339 gfx_v8_0_set_rlc_funcs(adev); 5340 5341 return 0; 5342 } 5343 5344 static int gfx_v8_0_late_init(void *handle) 5345 { 5346 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5347 int r; 5348 5349 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5350 if (r) 5351 return r; 5352 5353 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5354 if (r) 5355 return r; 5356 5357 /* requires IBs so do in late init after IB pool is initialized */ 5358 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5359 if (r) 5360 return r; 5361 5362 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5363 if (r) { 5364 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5365 return r; 5366 } 5367 5368 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5369 if (r) { 5370 DRM_ERROR( 5371 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5372 r); 5373 return r; 5374 } 5375 5376 return 0; 5377 } 5378 5379 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5380 bool enable) 5381 { 5382 if (((adev->asic_type == CHIP_POLARIS11) || 5383 (adev->asic_type == CHIP_POLARIS12) || 5384 (adev->asic_type == CHIP_VEGAM)) && 5385 adev->powerplay.pp_funcs->set_powergating_by_smu) 5386 /* Send msg to SMU via Powerplay */ 5387 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5388 5389 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5390 } 5391 5392 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5393 bool enable) 5394 { 5395 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5396 } 5397 5398 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5399 bool enable) 5400 { 5401 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5402 } 5403 5404 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5405 bool enable) 5406 { 5407 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5408 } 5409 5410 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5411 bool enable) 5412 { 5413 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5414 5415 /* Read any GFX register to wake up GFX. */ 5416 if (!enable) 5417 RREG32(mmDB_RENDER_CONTROL); 5418 } 5419 5420 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5421 bool enable) 5422 { 5423 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5424 cz_enable_gfx_cg_power_gating(adev, true); 5425 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5426 cz_enable_gfx_pipeline_power_gating(adev, true); 5427 } else { 5428 cz_enable_gfx_cg_power_gating(adev, false); 5429 cz_enable_gfx_pipeline_power_gating(adev, false); 5430 } 5431 } 5432 5433 static int gfx_v8_0_set_powergating_state(void *handle, 5434 enum amd_powergating_state state) 5435 { 5436 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5437 bool enable = (state == AMD_PG_STATE_GATE); 5438 5439 if (amdgpu_sriov_vf(adev)) 5440 return 0; 5441 5442 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5443 AMD_PG_SUPPORT_RLC_SMU_HS | 5444 AMD_PG_SUPPORT_CP | 5445 AMD_PG_SUPPORT_GFX_DMG)) 5446 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5447 switch (adev->asic_type) { 5448 case CHIP_CARRIZO: 5449 case CHIP_STONEY: 5450 5451 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5452 cz_enable_sck_slow_down_on_power_up(adev, true); 5453 cz_enable_sck_slow_down_on_power_down(adev, true); 5454 } else { 5455 cz_enable_sck_slow_down_on_power_up(adev, false); 5456 cz_enable_sck_slow_down_on_power_down(adev, false); 5457 } 5458 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5459 cz_enable_cp_power_gating(adev, true); 5460 else 5461 cz_enable_cp_power_gating(adev, false); 5462 5463 cz_update_gfx_cg_power_gating(adev, enable); 5464 5465 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5466 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5467 else 5468 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5469 5470 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5471 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5472 else 5473 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5474 break; 5475 case CHIP_POLARIS11: 5476 case CHIP_POLARIS12: 5477 case CHIP_VEGAM: 5478 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5479 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5480 else 5481 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5482 5483 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5484 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5485 else 5486 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5487 5488 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5489 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5490 else 5491 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5492 break; 5493 default: 5494 break; 5495 } 5496 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5497 AMD_PG_SUPPORT_RLC_SMU_HS | 5498 AMD_PG_SUPPORT_CP | 5499 AMD_PG_SUPPORT_GFX_DMG)) 5500 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5501 return 0; 5502 } 5503 5504 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5505 { 5506 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5507 int data; 5508 5509 if (amdgpu_sriov_vf(adev)) 5510 *flags = 0; 5511 5512 /* AMD_CG_SUPPORT_GFX_MGCG */ 5513 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5514 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5515 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5516 5517 /* AMD_CG_SUPPORT_GFX_CGLG */ 5518 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5519 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5520 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5521 5522 /* AMD_CG_SUPPORT_GFX_CGLS */ 5523 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5524 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5525 5526 /* AMD_CG_SUPPORT_GFX_CGTS */ 5527 data = RREG32(mmCGTS_SM_CTRL_REG); 5528 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5529 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5530 5531 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5532 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5533 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5534 5535 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5536 data = RREG32(mmRLC_MEM_SLP_CNTL); 5537 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5538 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5539 5540 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5541 data = RREG32(mmCP_MEM_SLP_CNTL); 5542 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5543 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5544 } 5545 5546 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5547 uint32_t reg_addr, uint32_t cmd) 5548 { 5549 uint32_t data; 5550 5551 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5552 5553 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5554 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5555 5556 data = RREG32(mmRLC_SERDES_WR_CTRL); 5557 if (adev->asic_type == CHIP_STONEY) 5558 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5559 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5560 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5561 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5562 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5563 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5564 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5565 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5566 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5567 else 5568 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5569 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5570 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5571 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5572 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5573 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5574 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5575 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5576 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5577 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5578 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5579 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5580 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5581 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5582 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5583 5584 WREG32(mmRLC_SERDES_WR_CTRL, data); 5585 } 5586 5587 #define MSG_ENTER_RLC_SAFE_MODE 1 5588 #define MSG_EXIT_RLC_SAFE_MODE 0 5589 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5590 #define RLC_GPR_REG2__REQ__SHIFT 0 5591 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5592 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5593 5594 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5595 { 5596 u32 data; 5597 unsigned i; 5598 5599 data = RREG32(mmRLC_CNTL); 5600 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5601 return; 5602 5603 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5604 data |= RLC_SAFE_MODE__CMD_MASK; 5605 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5606 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5607 WREG32(mmRLC_SAFE_MODE, data); 5608 5609 for (i = 0; i < adev->usec_timeout; i++) { 5610 if ((RREG32(mmRLC_GPM_STAT) & 5611 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5612 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5613 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5614 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5615 break; 5616 udelay(1); 5617 } 5618 5619 for (i = 0; i < adev->usec_timeout; i++) { 5620 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5621 break; 5622 udelay(1); 5623 } 5624 adev->gfx.rlc.in_safe_mode = true; 5625 } 5626 } 5627 5628 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5629 { 5630 u32 data = 0; 5631 unsigned i; 5632 5633 data = RREG32(mmRLC_CNTL); 5634 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5635 return; 5636 5637 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5638 if (adev->gfx.rlc.in_safe_mode) { 5639 data |= RLC_SAFE_MODE__CMD_MASK; 5640 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5641 WREG32(mmRLC_SAFE_MODE, data); 5642 adev->gfx.rlc.in_safe_mode = false; 5643 } 5644 } 5645 5646 for (i = 0; i < adev->usec_timeout; i++) { 5647 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5648 break; 5649 udelay(1); 5650 } 5651 } 5652 5653 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5654 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5655 .exit_safe_mode = iceland_exit_rlc_safe_mode 5656 }; 5657 5658 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5659 bool enable) 5660 { 5661 uint32_t temp, data; 5662 5663 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5664 5665 /* It is disabled by HW by default */ 5666 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5667 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5668 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5669 /* 1 - RLC memory Light sleep */ 5670 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5671 5672 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5673 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5674 } 5675 5676 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5677 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5678 if (adev->flags & AMD_IS_APU) 5679 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5680 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5681 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5682 else 5683 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5684 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5685 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5686 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5687 5688 if (temp != data) 5689 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5690 5691 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5692 gfx_v8_0_wait_for_rlc_serdes(adev); 5693 5694 /* 5 - clear mgcg override */ 5695 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5696 5697 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5698 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5699 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5700 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5701 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5702 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5703 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5704 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5705 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5706 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5707 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5708 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5709 if (temp != data) 5710 WREG32(mmCGTS_SM_CTRL_REG, data); 5711 } 5712 udelay(50); 5713 5714 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5715 gfx_v8_0_wait_for_rlc_serdes(adev); 5716 } else { 5717 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5718 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5719 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5720 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5721 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5722 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5723 if (temp != data) 5724 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5725 5726 /* 2 - disable MGLS in RLC */ 5727 data = RREG32(mmRLC_MEM_SLP_CNTL); 5728 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5729 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5730 WREG32(mmRLC_MEM_SLP_CNTL, data); 5731 } 5732 5733 /* 3 - disable MGLS in CP */ 5734 data = RREG32(mmCP_MEM_SLP_CNTL); 5735 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5736 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5737 WREG32(mmCP_MEM_SLP_CNTL, data); 5738 } 5739 5740 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5741 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5742 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5743 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5744 if (temp != data) 5745 WREG32(mmCGTS_SM_CTRL_REG, data); 5746 5747 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5748 gfx_v8_0_wait_for_rlc_serdes(adev); 5749 5750 /* 6 - set mgcg override */ 5751 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5752 5753 udelay(50); 5754 5755 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5756 gfx_v8_0_wait_for_rlc_serdes(adev); 5757 } 5758 5759 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5760 } 5761 5762 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5763 bool enable) 5764 { 5765 uint32_t temp, temp1, data, data1; 5766 5767 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5768 5769 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5770 5771 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5772 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5773 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5774 if (temp1 != data1) 5775 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5776 5777 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5778 gfx_v8_0_wait_for_rlc_serdes(adev); 5779 5780 /* 2 - clear cgcg override */ 5781 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5782 5783 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5784 gfx_v8_0_wait_for_rlc_serdes(adev); 5785 5786 /* 3 - write cmd to set CGLS */ 5787 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5788 5789 /* 4 - enable cgcg */ 5790 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5791 5792 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5793 /* enable cgls*/ 5794 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5795 5796 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5797 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5798 5799 if (temp1 != data1) 5800 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5801 } else { 5802 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5803 } 5804 5805 if (temp != data) 5806 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5807 5808 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5809 * Cmp_busy/GFX_Idle interrupts 5810 */ 5811 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5812 } else { 5813 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5814 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5815 5816 /* TEST CGCG */ 5817 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5818 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5819 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5820 if (temp1 != data1) 5821 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5822 5823 /* read gfx register to wake up cgcg */ 5824 RREG32(mmCB_CGTT_SCLK_CTRL); 5825 RREG32(mmCB_CGTT_SCLK_CTRL); 5826 RREG32(mmCB_CGTT_SCLK_CTRL); 5827 RREG32(mmCB_CGTT_SCLK_CTRL); 5828 5829 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5830 gfx_v8_0_wait_for_rlc_serdes(adev); 5831 5832 /* write cmd to Set CGCG Overrride */ 5833 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5834 5835 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5836 gfx_v8_0_wait_for_rlc_serdes(adev); 5837 5838 /* write cmd to Clear CGLS */ 5839 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5840 5841 /* disable cgcg, cgls should be disabled too. */ 5842 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5843 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5844 if (temp != data) 5845 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5846 /* enable interrupts again for PG */ 5847 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5848 } 5849 5850 gfx_v8_0_wait_for_rlc_serdes(adev); 5851 5852 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5853 } 5854 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5855 bool enable) 5856 { 5857 if (enable) { 5858 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5859 * === MGCG + MGLS + TS(CG/LS) === 5860 */ 5861 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5862 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5863 } else { 5864 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5865 * === CGCG + CGLS === 5866 */ 5867 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5868 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5869 } 5870 return 0; 5871 } 5872 5873 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5874 enum amd_clockgating_state state) 5875 { 5876 uint32_t msg_id, pp_state = 0; 5877 uint32_t pp_support_state = 0; 5878 5879 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5880 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5881 pp_support_state = PP_STATE_SUPPORT_LS; 5882 pp_state = PP_STATE_LS; 5883 } 5884 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5885 pp_support_state |= PP_STATE_SUPPORT_CG; 5886 pp_state |= PP_STATE_CG; 5887 } 5888 if (state == AMD_CG_STATE_UNGATE) 5889 pp_state = 0; 5890 5891 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5892 PP_BLOCK_GFX_CG, 5893 pp_support_state, 5894 pp_state); 5895 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5896 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5897 } 5898 5899 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5900 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5901 pp_support_state = PP_STATE_SUPPORT_LS; 5902 pp_state = PP_STATE_LS; 5903 } 5904 5905 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5906 pp_support_state |= PP_STATE_SUPPORT_CG; 5907 pp_state |= PP_STATE_CG; 5908 } 5909 5910 if (state == AMD_CG_STATE_UNGATE) 5911 pp_state = 0; 5912 5913 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5914 PP_BLOCK_GFX_MG, 5915 pp_support_state, 5916 pp_state); 5917 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5918 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5919 } 5920 5921 return 0; 5922 } 5923 5924 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 5925 enum amd_clockgating_state state) 5926 { 5927 5928 uint32_t msg_id, pp_state = 0; 5929 uint32_t pp_support_state = 0; 5930 5931 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5932 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5933 pp_support_state = PP_STATE_SUPPORT_LS; 5934 pp_state = PP_STATE_LS; 5935 } 5936 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5937 pp_support_state |= PP_STATE_SUPPORT_CG; 5938 pp_state |= PP_STATE_CG; 5939 } 5940 if (state == AMD_CG_STATE_UNGATE) 5941 pp_state = 0; 5942 5943 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5944 PP_BLOCK_GFX_CG, 5945 pp_support_state, 5946 pp_state); 5947 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5948 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5949 } 5950 5951 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 5952 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5953 pp_support_state = PP_STATE_SUPPORT_LS; 5954 pp_state = PP_STATE_LS; 5955 } 5956 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5957 pp_support_state |= PP_STATE_SUPPORT_CG; 5958 pp_state |= PP_STATE_CG; 5959 } 5960 if (state == AMD_CG_STATE_UNGATE) 5961 pp_state = 0; 5962 5963 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5964 PP_BLOCK_GFX_3D, 5965 pp_support_state, 5966 pp_state); 5967 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5968 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5969 } 5970 5971 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5972 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5973 pp_support_state = PP_STATE_SUPPORT_LS; 5974 pp_state = PP_STATE_LS; 5975 } 5976 5977 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5978 pp_support_state |= PP_STATE_SUPPORT_CG; 5979 pp_state |= PP_STATE_CG; 5980 } 5981 5982 if (state == AMD_CG_STATE_UNGATE) 5983 pp_state = 0; 5984 5985 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5986 PP_BLOCK_GFX_MG, 5987 pp_support_state, 5988 pp_state); 5989 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5990 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5991 } 5992 5993 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5994 pp_support_state = PP_STATE_SUPPORT_LS; 5995 5996 if (state == AMD_CG_STATE_UNGATE) 5997 pp_state = 0; 5998 else 5999 pp_state = PP_STATE_LS; 6000 6001 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6002 PP_BLOCK_GFX_RLC, 6003 pp_support_state, 6004 pp_state); 6005 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6006 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6007 } 6008 6009 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6010 pp_support_state = PP_STATE_SUPPORT_LS; 6011 6012 if (state == AMD_CG_STATE_UNGATE) 6013 pp_state = 0; 6014 else 6015 pp_state = PP_STATE_LS; 6016 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6017 PP_BLOCK_GFX_CP, 6018 pp_support_state, 6019 pp_state); 6020 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6021 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6022 } 6023 6024 return 0; 6025 } 6026 6027 static int gfx_v8_0_set_clockgating_state(void *handle, 6028 enum amd_clockgating_state state) 6029 { 6030 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6031 6032 if (amdgpu_sriov_vf(adev)) 6033 return 0; 6034 6035 switch (adev->asic_type) { 6036 case CHIP_FIJI: 6037 case CHIP_CARRIZO: 6038 case CHIP_STONEY: 6039 gfx_v8_0_update_gfx_clock_gating(adev, 6040 state == AMD_CG_STATE_GATE); 6041 break; 6042 case CHIP_TONGA: 6043 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6044 break; 6045 case CHIP_POLARIS10: 6046 case CHIP_POLARIS11: 6047 case CHIP_POLARIS12: 6048 case CHIP_VEGAM: 6049 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6050 break; 6051 default: 6052 break; 6053 } 6054 return 0; 6055 } 6056 6057 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6058 { 6059 return ring->adev->wb.wb[ring->rptr_offs]; 6060 } 6061 6062 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6063 { 6064 struct amdgpu_device *adev = ring->adev; 6065 6066 if (ring->use_doorbell) 6067 /* XXX check if swapping is necessary on BE */ 6068 return ring->adev->wb.wb[ring->wptr_offs]; 6069 else 6070 return RREG32(mmCP_RB0_WPTR); 6071 } 6072 6073 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6074 { 6075 struct amdgpu_device *adev = ring->adev; 6076 6077 if (ring->use_doorbell) { 6078 /* XXX check if swapping is necessary on BE */ 6079 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6080 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6081 } else { 6082 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6083 (void)RREG32(mmCP_RB0_WPTR); 6084 } 6085 } 6086 6087 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6088 { 6089 u32 ref_and_mask, reg_mem_engine; 6090 6091 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6092 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6093 switch (ring->me) { 6094 case 1: 6095 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6096 break; 6097 case 2: 6098 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6099 break; 6100 default: 6101 return; 6102 } 6103 reg_mem_engine = 0; 6104 } else { 6105 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6106 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6107 } 6108 6109 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6110 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6111 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6112 reg_mem_engine)); 6113 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6114 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6115 amdgpu_ring_write(ring, ref_and_mask); 6116 amdgpu_ring_write(ring, ref_and_mask); 6117 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6118 } 6119 6120 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6121 { 6122 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6123 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6124 EVENT_INDEX(4)); 6125 6126 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6127 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6128 EVENT_INDEX(0)); 6129 } 6130 6131 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6132 struct amdgpu_ib *ib, 6133 unsigned vmid, bool ctx_switch) 6134 { 6135 u32 header, control = 0; 6136 6137 if (ib->flags & AMDGPU_IB_FLAG_CE) 6138 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6139 else 6140 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6141 6142 control |= ib->length_dw | (vmid << 24); 6143 6144 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6145 control |= INDIRECT_BUFFER_PRE_ENB(1); 6146 6147 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6148 gfx_v8_0_ring_emit_de_meta(ring); 6149 } 6150 6151 amdgpu_ring_write(ring, header); 6152 amdgpu_ring_write(ring, 6153 #ifdef __BIG_ENDIAN 6154 (2 << 0) | 6155 #endif 6156 (ib->gpu_addr & 0xFFFFFFFC)); 6157 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6158 amdgpu_ring_write(ring, control); 6159 } 6160 6161 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6162 struct amdgpu_ib *ib, 6163 unsigned vmid, bool ctx_switch) 6164 { 6165 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6166 6167 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6168 amdgpu_ring_write(ring, 6169 #ifdef __BIG_ENDIAN 6170 (2 << 0) | 6171 #endif 6172 (ib->gpu_addr & 0xFFFFFFFC)); 6173 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6174 amdgpu_ring_write(ring, control); 6175 } 6176 6177 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6178 u64 seq, unsigned flags) 6179 { 6180 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6181 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6182 6183 /* EVENT_WRITE_EOP - flush caches, send int */ 6184 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6185 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6186 EOP_TC_ACTION_EN | 6187 EOP_TC_WB_ACTION_EN | 6188 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6189 EVENT_INDEX(5))); 6190 amdgpu_ring_write(ring, addr & 0xfffffffc); 6191 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6192 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6193 amdgpu_ring_write(ring, lower_32_bits(seq)); 6194 amdgpu_ring_write(ring, upper_32_bits(seq)); 6195 6196 } 6197 6198 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6199 { 6200 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6201 uint32_t seq = ring->fence_drv.sync_seq; 6202 uint64_t addr = ring->fence_drv.gpu_addr; 6203 6204 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6205 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6206 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6207 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6208 amdgpu_ring_write(ring, addr & 0xfffffffc); 6209 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6210 amdgpu_ring_write(ring, seq); 6211 amdgpu_ring_write(ring, 0xffffffff); 6212 amdgpu_ring_write(ring, 4); /* poll interval */ 6213 } 6214 6215 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6216 unsigned vmid, uint64_t pd_addr) 6217 { 6218 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6219 6220 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6221 6222 /* wait for the invalidate to complete */ 6223 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6224 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6225 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6226 WAIT_REG_MEM_ENGINE(0))); /* me */ 6227 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6228 amdgpu_ring_write(ring, 0); 6229 amdgpu_ring_write(ring, 0); /* ref */ 6230 amdgpu_ring_write(ring, 0); /* mask */ 6231 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6232 6233 /* compute doesn't have PFP */ 6234 if (usepfp) { 6235 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6236 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6237 amdgpu_ring_write(ring, 0x0); 6238 } 6239 } 6240 6241 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6242 { 6243 return ring->adev->wb.wb[ring->wptr_offs]; 6244 } 6245 6246 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6247 { 6248 struct amdgpu_device *adev = ring->adev; 6249 6250 /* XXX check if swapping is necessary on BE */ 6251 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6252 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6253 } 6254 6255 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6256 bool acquire) 6257 { 6258 struct amdgpu_device *adev = ring->adev; 6259 int pipe_num, tmp, reg; 6260 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6261 6262 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6263 6264 /* first me only has 2 entries, GFX and HP3D */ 6265 if (ring->me > 0) 6266 pipe_num -= 2; 6267 6268 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6269 tmp = RREG32(reg); 6270 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6271 WREG32(reg, tmp); 6272 } 6273 6274 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6275 struct amdgpu_ring *ring, 6276 bool acquire) 6277 { 6278 int i, pipe; 6279 bool reserve; 6280 struct amdgpu_ring *iring; 6281 6282 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6283 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6284 if (acquire) 6285 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6286 else 6287 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6288 6289 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6290 /* Clear all reservations - everyone reacquires all resources */ 6291 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6292 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6293 true); 6294 6295 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6296 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6297 true); 6298 } else { 6299 /* Lower all pipes without a current reservation */ 6300 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6301 iring = &adev->gfx.gfx_ring[i]; 6302 pipe = amdgpu_gfx_queue_to_bit(adev, 6303 iring->me, 6304 iring->pipe, 6305 0); 6306 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6307 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6308 } 6309 6310 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6311 iring = &adev->gfx.compute_ring[i]; 6312 pipe = amdgpu_gfx_queue_to_bit(adev, 6313 iring->me, 6314 iring->pipe, 6315 0); 6316 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6317 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6318 } 6319 } 6320 6321 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6322 } 6323 6324 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6325 struct amdgpu_ring *ring, 6326 bool acquire) 6327 { 6328 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6329 uint32_t queue_priority = acquire ? 0xf : 0x0; 6330 6331 mutex_lock(&adev->srbm_mutex); 6332 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6333 6334 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6335 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6336 6337 vi_srbm_select(adev, 0, 0, 0, 0); 6338 mutex_unlock(&adev->srbm_mutex); 6339 } 6340 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6341 enum drm_sched_priority priority) 6342 { 6343 struct amdgpu_device *adev = ring->adev; 6344 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6345 6346 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6347 return; 6348 6349 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6350 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6351 } 6352 6353 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6354 u64 addr, u64 seq, 6355 unsigned flags) 6356 { 6357 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6358 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6359 6360 /* RELEASE_MEM - flush caches, send int */ 6361 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6362 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6363 EOP_TC_ACTION_EN | 6364 EOP_TC_WB_ACTION_EN | 6365 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6366 EVENT_INDEX(5))); 6367 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6368 amdgpu_ring_write(ring, addr & 0xfffffffc); 6369 amdgpu_ring_write(ring, upper_32_bits(addr)); 6370 amdgpu_ring_write(ring, lower_32_bits(seq)); 6371 amdgpu_ring_write(ring, upper_32_bits(seq)); 6372 } 6373 6374 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6375 u64 seq, unsigned int flags) 6376 { 6377 /* we only allocate 32bit for each seq wb address */ 6378 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6379 6380 /* write fence seq to the "addr" */ 6381 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6382 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6383 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6384 amdgpu_ring_write(ring, lower_32_bits(addr)); 6385 amdgpu_ring_write(ring, upper_32_bits(addr)); 6386 amdgpu_ring_write(ring, lower_32_bits(seq)); 6387 6388 if (flags & AMDGPU_FENCE_FLAG_INT) { 6389 /* set register to trigger INT */ 6390 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6391 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6392 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6393 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6394 amdgpu_ring_write(ring, 0); 6395 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6396 } 6397 } 6398 6399 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6400 { 6401 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6402 amdgpu_ring_write(ring, 0); 6403 } 6404 6405 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6406 { 6407 uint32_t dw2 = 0; 6408 6409 if (amdgpu_sriov_vf(ring->adev)) 6410 gfx_v8_0_ring_emit_ce_meta(ring); 6411 6412 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6413 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6414 gfx_v8_0_ring_emit_vgt_flush(ring); 6415 /* set load_global_config & load_global_uconfig */ 6416 dw2 |= 0x8001; 6417 /* set load_cs_sh_regs */ 6418 dw2 |= 0x01000000; 6419 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6420 dw2 |= 0x10002; 6421 6422 /* set load_ce_ram if preamble presented */ 6423 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6424 dw2 |= 0x10000000; 6425 } else { 6426 /* still load_ce_ram if this is the first time preamble presented 6427 * although there is no context switch happens. 6428 */ 6429 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6430 dw2 |= 0x10000000; 6431 } 6432 6433 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6434 amdgpu_ring_write(ring, dw2); 6435 amdgpu_ring_write(ring, 0); 6436 } 6437 6438 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6439 { 6440 unsigned ret; 6441 6442 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6443 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6444 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6445 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6446 ret = ring->wptr & ring->buf_mask; 6447 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6448 return ret; 6449 } 6450 6451 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6452 { 6453 unsigned cur; 6454 6455 BUG_ON(offset > ring->buf_mask); 6456 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6457 6458 cur = (ring->wptr & ring->buf_mask) - 1; 6459 if (likely(cur > offset)) 6460 ring->ring[offset] = cur - offset; 6461 else 6462 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6463 } 6464 6465 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6466 { 6467 struct amdgpu_device *adev = ring->adev; 6468 6469 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6470 amdgpu_ring_write(ring, 0 | /* src: register*/ 6471 (5 << 8) | /* dst: memory */ 6472 (1 << 20)); /* write confirm */ 6473 amdgpu_ring_write(ring, reg); 6474 amdgpu_ring_write(ring, 0); 6475 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6476 adev->virt.reg_val_offs * 4)); 6477 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6478 adev->virt.reg_val_offs * 4)); 6479 } 6480 6481 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6482 uint32_t val) 6483 { 6484 uint32_t cmd; 6485 6486 switch (ring->funcs->type) { 6487 case AMDGPU_RING_TYPE_GFX: 6488 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6489 break; 6490 case AMDGPU_RING_TYPE_KIQ: 6491 cmd = 1 << 16; /* no inc addr */ 6492 break; 6493 default: 6494 cmd = WR_CONFIRM; 6495 break; 6496 } 6497 6498 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6499 amdgpu_ring_write(ring, cmd); 6500 amdgpu_ring_write(ring, reg); 6501 amdgpu_ring_write(ring, 0); 6502 amdgpu_ring_write(ring, val); 6503 } 6504 6505 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 6506 { 6507 struct amdgpu_device *adev = ring->adev; 6508 uint32_t value = 0; 6509 6510 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6511 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6512 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6513 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6514 WREG32(mmSQ_CMD, value); 6515 } 6516 6517 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6518 enum amdgpu_interrupt_state state) 6519 { 6520 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6521 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6522 } 6523 6524 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6525 int me, int pipe, 6526 enum amdgpu_interrupt_state state) 6527 { 6528 u32 mec_int_cntl, mec_int_cntl_reg; 6529 6530 /* 6531 * amdgpu controls only the first MEC. That's why this function only 6532 * handles the setting of interrupts for this specific MEC. All other 6533 * pipes' interrupts are set by amdkfd. 6534 */ 6535 6536 if (me == 1) { 6537 switch (pipe) { 6538 case 0: 6539 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6540 break; 6541 case 1: 6542 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6543 break; 6544 case 2: 6545 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6546 break; 6547 case 3: 6548 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6549 break; 6550 default: 6551 DRM_DEBUG("invalid pipe %d\n", pipe); 6552 return; 6553 } 6554 } else { 6555 DRM_DEBUG("invalid me %d\n", me); 6556 return; 6557 } 6558 6559 switch (state) { 6560 case AMDGPU_IRQ_STATE_DISABLE: 6561 mec_int_cntl = RREG32(mec_int_cntl_reg); 6562 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6563 WREG32(mec_int_cntl_reg, mec_int_cntl); 6564 break; 6565 case AMDGPU_IRQ_STATE_ENABLE: 6566 mec_int_cntl = RREG32(mec_int_cntl_reg); 6567 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6568 WREG32(mec_int_cntl_reg, mec_int_cntl); 6569 break; 6570 default: 6571 break; 6572 } 6573 } 6574 6575 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6576 struct amdgpu_irq_src *source, 6577 unsigned type, 6578 enum amdgpu_interrupt_state state) 6579 { 6580 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6581 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6582 6583 return 0; 6584 } 6585 6586 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6587 struct amdgpu_irq_src *source, 6588 unsigned type, 6589 enum amdgpu_interrupt_state state) 6590 { 6591 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6592 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6593 6594 return 0; 6595 } 6596 6597 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6598 struct amdgpu_irq_src *src, 6599 unsigned type, 6600 enum amdgpu_interrupt_state state) 6601 { 6602 switch (type) { 6603 case AMDGPU_CP_IRQ_GFX_EOP: 6604 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6605 break; 6606 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6607 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6608 break; 6609 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6610 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6611 break; 6612 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6613 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6614 break; 6615 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6616 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6617 break; 6618 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6619 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6620 break; 6621 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6622 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6623 break; 6624 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6625 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6626 break; 6627 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6628 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6629 break; 6630 default: 6631 break; 6632 } 6633 return 0; 6634 } 6635 6636 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6637 struct amdgpu_irq_src *source, 6638 unsigned int type, 6639 enum amdgpu_interrupt_state state) 6640 { 6641 int enable_flag; 6642 6643 switch (state) { 6644 case AMDGPU_IRQ_STATE_DISABLE: 6645 enable_flag = 0; 6646 break; 6647 6648 case AMDGPU_IRQ_STATE_ENABLE: 6649 enable_flag = 1; 6650 break; 6651 6652 default: 6653 return -EINVAL; 6654 } 6655 6656 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6657 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6658 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6659 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6660 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6661 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6662 enable_flag); 6663 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6664 enable_flag); 6665 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6666 enable_flag); 6667 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6668 enable_flag); 6669 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6670 enable_flag); 6671 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6672 enable_flag); 6673 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6674 enable_flag); 6675 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6676 enable_flag); 6677 6678 return 0; 6679 } 6680 6681 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6682 struct amdgpu_irq_src *source, 6683 unsigned int type, 6684 enum amdgpu_interrupt_state state) 6685 { 6686 int enable_flag; 6687 6688 switch (state) { 6689 case AMDGPU_IRQ_STATE_DISABLE: 6690 enable_flag = 1; 6691 break; 6692 6693 case AMDGPU_IRQ_STATE_ENABLE: 6694 enable_flag = 0; 6695 break; 6696 6697 default: 6698 return -EINVAL; 6699 } 6700 6701 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6702 enable_flag); 6703 6704 return 0; 6705 } 6706 6707 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6708 struct amdgpu_irq_src *source, 6709 struct amdgpu_iv_entry *entry) 6710 { 6711 int i; 6712 u8 me_id, pipe_id, queue_id; 6713 struct amdgpu_ring *ring; 6714 6715 DRM_DEBUG("IH: CP EOP\n"); 6716 me_id = (entry->ring_id & 0x0c) >> 2; 6717 pipe_id = (entry->ring_id & 0x03) >> 0; 6718 queue_id = (entry->ring_id & 0x70) >> 4; 6719 6720 switch (me_id) { 6721 case 0: 6722 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6723 break; 6724 case 1: 6725 case 2: 6726 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6727 ring = &adev->gfx.compute_ring[i]; 6728 /* Per-queue interrupt is supported for MEC starting from VI. 6729 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6730 */ 6731 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6732 amdgpu_fence_process(ring); 6733 } 6734 break; 6735 } 6736 return 0; 6737 } 6738 6739 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6740 struct amdgpu_irq_src *source, 6741 struct amdgpu_iv_entry *entry) 6742 { 6743 DRM_ERROR("Illegal register access in command stream\n"); 6744 schedule_work(&adev->reset_work); 6745 return 0; 6746 } 6747 6748 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6749 struct amdgpu_irq_src *source, 6750 struct amdgpu_iv_entry *entry) 6751 { 6752 DRM_ERROR("Illegal instruction in command stream\n"); 6753 schedule_work(&adev->reset_work); 6754 return 0; 6755 } 6756 6757 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6758 struct amdgpu_irq_src *source, 6759 struct amdgpu_iv_entry *entry) 6760 { 6761 DRM_ERROR("CP EDC/ECC error detected."); 6762 return 0; 6763 } 6764 6765 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6766 { 6767 u32 enc, se_id, sh_id, cu_id; 6768 char type[20]; 6769 int sq_edc_source = -1; 6770 6771 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 6772 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 6773 6774 switch (enc) { 6775 case 0: 6776 DRM_INFO("SQ general purpose intr detected:" 6777 "se_id %d, immed_overflow %d, host_reg_overflow %d," 6778 "host_cmd_overflow %d, cmd_timestamp %d," 6779 "reg_timestamp %d, thread_trace_buff_full %d," 6780 "wlt %d, thread_trace %d.\n", 6781 se_id, 6782 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 6783 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 6784 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 6785 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 6786 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 6787 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 6788 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 6789 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 6790 ); 6791 break; 6792 case 1: 6793 case 2: 6794 6795 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 6796 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 6797 6798 /* 6799 * This function can be called either directly from ISR 6800 * or from BH in which case we can access SQ_EDC_INFO 6801 * instance 6802 */ 6803 if (in_task()) { 6804 mutex_lock(&adev->grbm_idx_mutex); 6805 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 6806 6807 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 6808 6809 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6810 mutex_unlock(&adev->grbm_idx_mutex); 6811 } 6812 6813 if (enc == 1) 6814 sprintf(type, "instruction intr"); 6815 else 6816 sprintf(type, "EDC/ECC error"); 6817 6818 DRM_INFO( 6819 "SQ %s detected: " 6820 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 6821 "trap %s, sq_ed_info.source %s.\n", 6822 type, se_id, sh_id, cu_id, 6823 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 6824 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 6825 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 6826 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 6827 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 6828 ); 6829 break; 6830 default: 6831 DRM_ERROR("SQ invalid encoding type\n."); 6832 } 6833 } 6834 6835 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 6836 { 6837 6838 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 6839 struct sq_work *sq_work = container_of(work, struct sq_work, work); 6840 6841 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 6842 } 6843 6844 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 6845 struct amdgpu_irq_src *source, 6846 struct amdgpu_iv_entry *entry) 6847 { 6848 unsigned ih_data = entry->src_data[0]; 6849 6850 /* 6851 * Try to submit work so SQ_EDC_INFO can be accessed from 6852 * BH. If previous work submission hasn't finished yet 6853 * just print whatever info is possible directly from the ISR. 6854 */ 6855 if (work_pending(&adev->gfx.sq_work.work)) { 6856 gfx_v8_0_parse_sq_irq(adev, ih_data); 6857 } else { 6858 adev->gfx.sq_work.ih_data = ih_data; 6859 schedule_work(&adev->gfx.sq_work.work); 6860 } 6861 6862 return 0; 6863 } 6864 6865 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6866 .name = "gfx_v8_0", 6867 .early_init = gfx_v8_0_early_init, 6868 .late_init = gfx_v8_0_late_init, 6869 .sw_init = gfx_v8_0_sw_init, 6870 .sw_fini = gfx_v8_0_sw_fini, 6871 .hw_init = gfx_v8_0_hw_init, 6872 .hw_fini = gfx_v8_0_hw_fini, 6873 .suspend = gfx_v8_0_suspend, 6874 .resume = gfx_v8_0_resume, 6875 .is_idle = gfx_v8_0_is_idle, 6876 .wait_for_idle = gfx_v8_0_wait_for_idle, 6877 .check_soft_reset = gfx_v8_0_check_soft_reset, 6878 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6879 .soft_reset = gfx_v8_0_soft_reset, 6880 .post_soft_reset = gfx_v8_0_post_soft_reset, 6881 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6882 .set_powergating_state = gfx_v8_0_set_powergating_state, 6883 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6884 }; 6885 6886 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6887 .type = AMDGPU_RING_TYPE_GFX, 6888 .align_mask = 0xff, 6889 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6890 .support_64bit_ptrs = false, 6891 .get_rptr = gfx_v8_0_ring_get_rptr, 6892 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6893 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6894 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6895 5 + /* COND_EXEC */ 6896 7 + /* PIPELINE_SYNC */ 6897 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 6898 8 + /* FENCE for VM_FLUSH */ 6899 20 + /* GDS switch */ 6900 4 + /* double SWITCH_BUFFER, 6901 the first COND_EXEC jump to the place just 6902 prior to this double SWITCH_BUFFER */ 6903 5 + /* COND_EXEC */ 6904 7 + /* HDP_flush */ 6905 4 + /* VGT_flush */ 6906 14 + /* CE_META */ 6907 31 + /* DE_META */ 6908 3 + /* CNTX_CTRL */ 6909 5 + /* HDP_INVL */ 6910 8 + 8 + /* FENCE x2 */ 6911 2, /* SWITCH_BUFFER */ 6912 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6913 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6914 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6915 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6916 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6917 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6918 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6919 .test_ring = gfx_v8_0_ring_test_ring, 6920 .test_ib = gfx_v8_0_ring_test_ib, 6921 .insert_nop = amdgpu_ring_insert_nop, 6922 .pad_ib = amdgpu_ring_generic_pad_ib, 6923 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6924 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6925 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6926 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6927 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6928 .soft_recovery = gfx_v8_0_ring_soft_recovery, 6929 }; 6930 6931 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6932 .type = AMDGPU_RING_TYPE_COMPUTE, 6933 .align_mask = 0xff, 6934 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6935 .support_64bit_ptrs = false, 6936 .get_rptr = gfx_v8_0_ring_get_rptr, 6937 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6938 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6939 .emit_frame_size = 6940 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6941 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6942 5 + /* hdp_invalidate */ 6943 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6944 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6945 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6946 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6947 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6948 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6949 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6950 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6951 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6952 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6953 .test_ring = gfx_v8_0_ring_test_ring, 6954 .test_ib = gfx_v8_0_ring_test_ib, 6955 .insert_nop = amdgpu_ring_insert_nop, 6956 .pad_ib = amdgpu_ring_generic_pad_ib, 6957 .set_priority = gfx_v8_0_ring_set_priority_compute, 6958 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6959 }; 6960 6961 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6962 .type = AMDGPU_RING_TYPE_KIQ, 6963 .align_mask = 0xff, 6964 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6965 .support_64bit_ptrs = false, 6966 .get_rptr = gfx_v8_0_ring_get_rptr, 6967 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6968 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6969 .emit_frame_size = 6970 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6971 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6972 5 + /* hdp_invalidate */ 6973 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6974 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6975 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6976 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6977 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6978 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6979 .test_ring = gfx_v8_0_ring_test_ring, 6980 .test_ib = gfx_v8_0_ring_test_ib, 6981 .insert_nop = amdgpu_ring_insert_nop, 6982 .pad_ib = amdgpu_ring_generic_pad_ib, 6983 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6984 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6985 }; 6986 6987 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6988 { 6989 int i; 6990 6991 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6992 6993 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6994 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6995 6996 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6997 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6998 } 6999 7000 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7001 .set = gfx_v8_0_set_eop_interrupt_state, 7002 .process = gfx_v8_0_eop_irq, 7003 }; 7004 7005 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7006 .set = gfx_v8_0_set_priv_reg_fault_state, 7007 .process = gfx_v8_0_priv_reg_irq, 7008 }; 7009 7010 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7011 .set = gfx_v8_0_set_priv_inst_fault_state, 7012 .process = gfx_v8_0_priv_inst_irq, 7013 }; 7014 7015 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7016 .set = gfx_v8_0_set_cp_ecc_int_state, 7017 .process = gfx_v8_0_cp_ecc_error_irq, 7018 }; 7019 7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7021 .set = gfx_v8_0_set_sq_int_state, 7022 .process = gfx_v8_0_sq_irq, 7023 }; 7024 7025 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7026 { 7027 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7028 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7029 7030 adev->gfx.priv_reg_irq.num_types = 1; 7031 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7032 7033 adev->gfx.priv_inst_irq.num_types = 1; 7034 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7035 7036 adev->gfx.cp_ecc_error_irq.num_types = 1; 7037 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7038 7039 adev->gfx.sq_irq.num_types = 1; 7040 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7041 } 7042 7043 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7044 { 7045 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7046 } 7047 7048 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7049 { 7050 /* init asci gds info */ 7051 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7052 adev->gds.gws.total_size = 64; 7053 adev->gds.oa.total_size = 16; 7054 7055 if (adev->gds.mem.total_size == 64 * 1024) { 7056 adev->gds.mem.gfx_partition_size = 4096; 7057 adev->gds.mem.cs_partition_size = 4096; 7058 7059 adev->gds.gws.gfx_partition_size = 4; 7060 adev->gds.gws.cs_partition_size = 4; 7061 7062 adev->gds.oa.gfx_partition_size = 4; 7063 adev->gds.oa.cs_partition_size = 1; 7064 } else { 7065 adev->gds.mem.gfx_partition_size = 1024; 7066 adev->gds.mem.cs_partition_size = 1024; 7067 7068 adev->gds.gws.gfx_partition_size = 16; 7069 adev->gds.gws.cs_partition_size = 16; 7070 7071 adev->gds.oa.gfx_partition_size = 4; 7072 adev->gds.oa.cs_partition_size = 4; 7073 } 7074 } 7075 7076 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7077 u32 bitmap) 7078 { 7079 u32 data; 7080 7081 if (!bitmap) 7082 return; 7083 7084 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7085 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7086 7087 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7088 } 7089 7090 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7091 { 7092 u32 data, mask; 7093 7094 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7095 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7096 7097 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7098 7099 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7100 } 7101 7102 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7103 { 7104 int i, j, k, counter, active_cu_number = 0; 7105 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7106 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7107 unsigned disable_masks[4 * 2]; 7108 u32 ao_cu_num; 7109 7110 memset(cu_info, 0, sizeof(*cu_info)); 7111 7112 if (adev->flags & AMD_IS_APU) 7113 ao_cu_num = 2; 7114 else 7115 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7116 7117 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7118 7119 mutex_lock(&adev->grbm_idx_mutex); 7120 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7121 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7122 mask = 1; 7123 ao_bitmap = 0; 7124 counter = 0; 7125 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7126 if (i < 4 && j < 2) 7127 gfx_v8_0_set_user_cu_inactive_bitmap( 7128 adev, disable_masks[i * 2 + j]); 7129 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7130 cu_info->bitmap[i][j] = bitmap; 7131 7132 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7133 if (bitmap & mask) { 7134 if (counter < ao_cu_num) 7135 ao_bitmap |= mask; 7136 counter ++; 7137 } 7138 mask <<= 1; 7139 } 7140 active_cu_number += counter; 7141 if (i < 2 && j < 2) 7142 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7143 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7144 } 7145 } 7146 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7147 mutex_unlock(&adev->grbm_idx_mutex); 7148 7149 cu_info->number = active_cu_number; 7150 cu_info->ao_cu_mask = ao_cu_mask; 7151 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7152 cu_info->max_waves_per_simd = 10; 7153 cu_info->max_scratch_slots_per_cu = 32; 7154 cu_info->wave_front_size = 64; 7155 cu_info->lds_size = 64; 7156 } 7157 7158 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7159 { 7160 .type = AMD_IP_BLOCK_TYPE_GFX, 7161 .major = 8, 7162 .minor = 0, 7163 .rev = 0, 7164 .funcs = &gfx_v8_0_ip_funcs, 7165 }; 7166 7167 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7168 { 7169 .type = AMD_IP_BLOCK_TYPE_GFX, 7170 .major = 8, 7171 .minor = 1, 7172 .rev = 0, 7173 .funcs = &gfx_v8_0_ip_funcs, 7174 }; 7175 7176 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7177 { 7178 uint64_t ce_payload_addr; 7179 int cnt_ce; 7180 union { 7181 struct vi_ce_ib_state regular; 7182 struct vi_ce_ib_state_chained_ib chained; 7183 } ce_payload = {}; 7184 7185 if (ring->adev->virt.chained_ib_support) { 7186 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7187 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7188 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7189 } else { 7190 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7191 offsetof(struct vi_gfx_meta_data, ce_payload); 7192 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7193 } 7194 7195 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7196 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7197 WRITE_DATA_DST_SEL(8) | 7198 WR_CONFIRM) | 7199 WRITE_DATA_CACHE_POLICY(0)); 7200 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7201 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7202 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7203 } 7204 7205 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7206 { 7207 uint64_t de_payload_addr, gds_addr, csa_addr; 7208 int cnt_de; 7209 union { 7210 struct vi_de_ib_state regular; 7211 struct vi_de_ib_state_chained_ib chained; 7212 } de_payload = {}; 7213 7214 csa_addr = amdgpu_csa_vaddr(ring->adev); 7215 gds_addr = csa_addr + 4096; 7216 if (ring->adev->virt.chained_ib_support) { 7217 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7218 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7219 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7220 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7221 } else { 7222 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7223 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7224 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7225 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7226 } 7227 7228 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7229 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7230 WRITE_DATA_DST_SEL(8) | 7231 WR_CONFIRM) | 7232 WRITE_DATA_CACHE_POLICY(0)); 7233 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7234 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7235 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7236 } 7237