1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_MEC_HPD_SIZE 2048 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 139 140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 151 152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 163 164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 166 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 170 171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 172 { 173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 189 }; 190 191 static const u32 golden_settings_tonga_a11[] = 192 { 193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 195 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 196 mmGB_GPU_ID, 0x0000000f, 0x00000000, 197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 203 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 209 }; 210 211 static const u32 tonga_golden_common_all[] = 212 { 213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 221 }; 222 223 static const u32 tonga_mgcg_cgcg_init[] = 224 { 225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 300 }; 301 302 static const u32 golden_settings_vegam_a11[] = 303 { 304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 307 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 314 mmSQ_CONFIG, 0x07f80000, 0x01180000, 315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 316 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 321 }; 322 323 static const u32 vegam_golden_common_all[] = 324 { 325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 331 }; 332 333 static const u32 golden_settings_polaris11_a11[] = 334 { 335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 338 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 345 mmSQ_CONFIG, 0x07f80000, 0x01180000, 346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 347 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 352 }; 353 354 static const u32 polaris11_golden_common_all[] = 355 { 356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 362 }; 363 364 static const u32 golden_settings_polaris10_a11[] = 365 { 366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 370 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 377 mmSQ_CONFIG, 0x07f80000, 0x07180000, 378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 379 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 383 }; 384 385 static const u32 polaris10_golden_common_all[] = 386 { 387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 395 }; 396 397 static const u32 fiji_golden_common_all[] = 398 { 399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 409 }; 410 411 static const u32 golden_settings_fiji_a10[] = 412 { 413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 414 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 420 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 424 }; 425 426 static const u32 fiji_mgcg_cgcg_init[] = 427 { 428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 463 }; 464 465 static const u32 golden_settings_iceland_a11[] = 466 { 467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 468 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 469 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 470 mmGB_GPU_ID, 0x0000000f, 0x00000000, 471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 478 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 483 }; 484 485 static const u32 iceland_golden_common_all[] = 486 { 487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 495 }; 496 497 static const u32 iceland_mgcg_cgcg_init[] = 498 { 499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 563 }; 564 565 static const u32 cz_golden_settings_a11[] = 566 { 567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 568 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 569 mmGB_GPU_ID, 0x0000000f, 0x00000000, 570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 575 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 579 }; 580 581 static const u32 cz_golden_common_all[] = 582 { 583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 591 }; 592 593 static const u32 cz_mgcg_cgcg_init[] = 594 { 595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 670 }; 671 672 static const u32 stoney_golden_settings_a11[] = 673 { 674 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 675 mmGB_GPU_ID, 0x0000000f, 0x00000000, 676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 680 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 684 }; 685 686 static const u32 stoney_golden_common_all[] = 687 { 688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 696 }; 697 698 static const u32 stoney_mgcg_cgcg_init[] = 699 { 700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 705 }; 706 707 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 708 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 709 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 710 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 711 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 712 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 713 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 714 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 715 716 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 717 { 718 switch (adev->asic_type) { 719 case CHIP_TOPAZ: 720 amdgpu_device_program_register_sequence(adev, 721 iceland_mgcg_cgcg_init, 722 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 723 amdgpu_device_program_register_sequence(adev, 724 golden_settings_iceland_a11, 725 ARRAY_SIZE(golden_settings_iceland_a11)); 726 amdgpu_device_program_register_sequence(adev, 727 iceland_golden_common_all, 728 ARRAY_SIZE(iceland_golden_common_all)); 729 break; 730 case CHIP_FIJI: 731 amdgpu_device_program_register_sequence(adev, 732 fiji_mgcg_cgcg_init, 733 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 734 amdgpu_device_program_register_sequence(adev, 735 golden_settings_fiji_a10, 736 ARRAY_SIZE(golden_settings_fiji_a10)); 737 amdgpu_device_program_register_sequence(adev, 738 fiji_golden_common_all, 739 ARRAY_SIZE(fiji_golden_common_all)); 740 break; 741 742 case CHIP_TONGA: 743 amdgpu_device_program_register_sequence(adev, 744 tonga_mgcg_cgcg_init, 745 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 746 amdgpu_device_program_register_sequence(adev, 747 golden_settings_tonga_a11, 748 ARRAY_SIZE(golden_settings_tonga_a11)); 749 amdgpu_device_program_register_sequence(adev, 750 tonga_golden_common_all, 751 ARRAY_SIZE(tonga_golden_common_all)); 752 break; 753 case CHIP_VEGAM: 754 amdgpu_device_program_register_sequence(adev, 755 golden_settings_vegam_a11, 756 ARRAY_SIZE(golden_settings_vegam_a11)); 757 amdgpu_device_program_register_sequence(adev, 758 vegam_golden_common_all, 759 ARRAY_SIZE(vegam_golden_common_all)); 760 break; 761 case CHIP_POLARIS11: 762 case CHIP_POLARIS12: 763 amdgpu_device_program_register_sequence(adev, 764 golden_settings_polaris11_a11, 765 ARRAY_SIZE(golden_settings_polaris11_a11)); 766 amdgpu_device_program_register_sequence(adev, 767 polaris11_golden_common_all, 768 ARRAY_SIZE(polaris11_golden_common_all)); 769 break; 770 case CHIP_POLARIS10: 771 amdgpu_device_program_register_sequence(adev, 772 golden_settings_polaris10_a11, 773 ARRAY_SIZE(golden_settings_polaris10_a11)); 774 amdgpu_device_program_register_sequence(adev, 775 polaris10_golden_common_all, 776 ARRAY_SIZE(polaris10_golden_common_all)); 777 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 778 if (adev->pdev->revision == 0xc7 && 779 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 780 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 781 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 782 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 783 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 784 } 785 break; 786 case CHIP_CARRIZO: 787 amdgpu_device_program_register_sequence(adev, 788 cz_mgcg_cgcg_init, 789 ARRAY_SIZE(cz_mgcg_cgcg_init)); 790 amdgpu_device_program_register_sequence(adev, 791 cz_golden_settings_a11, 792 ARRAY_SIZE(cz_golden_settings_a11)); 793 amdgpu_device_program_register_sequence(adev, 794 cz_golden_common_all, 795 ARRAY_SIZE(cz_golden_common_all)); 796 break; 797 case CHIP_STONEY: 798 amdgpu_device_program_register_sequence(adev, 799 stoney_mgcg_cgcg_init, 800 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 801 amdgpu_device_program_register_sequence(adev, 802 stoney_golden_settings_a11, 803 ARRAY_SIZE(stoney_golden_settings_a11)); 804 amdgpu_device_program_register_sequence(adev, 805 stoney_golden_common_all, 806 ARRAY_SIZE(stoney_golden_common_all)); 807 break; 808 default: 809 break; 810 } 811 } 812 813 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 814 { 815 adev->gfx.scratch.num_reg = 8; 816 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 817 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 818 } 819 820 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 821 { 822 struct amdgpu_device *adev = ring->adev; 823 uint32_t scratch; 824 uint32_t tmp = 0; 825 unsigned i; 826 int r; 827 828 r = amdgpu_gfx_scratch_get(adev, &scratch); 829 if (r) { 830 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 831 return r; 832 } 833 WREG32(scratch, 0xCAFEDEAD); 834 r = amdgpu_ring_alloc(ring, 3); 835 if (r) { 836 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 837 ring->idx, r); 838 amdgpu_gfx_scratch_free(adev, scratch); 839 return r; 840 } 841 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 842 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 843 amdgpu_ring_write(ring, 0xDEADBEEF); 844 amdgpu_ring_commit(ring); 845 846 for (i = 0; i < adev->usec_timeout; i++) { 847 tmp = RREG32(scratch); 848 if (tmp == 0xDEADBEEF) 849 break; 850 DRM_UDELAY(1); 851 } 852 if (i < adev->usec_timeout) { 853 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 854 ring->idx, i); 855 } else { 856 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 857 ring->idx, scratch, tmp); 858 r = -EINVAL; 859 } 860 amdgpu_gfx_scratch_free(adev, scratch); 861 return r; 862 } 863 864 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 865 { 866 struct amdgpu_device *adev = ring->adev; 867 struct amdgpu_ib ib; 868 struct dma_fence *f = NULL; 869 uint32_t scratch; 870 uint32_t tmp = 0; 871 long r; 872 873 r = amdgpu_gfx_scratch_get(adev, &scratch); 874 if (r) { 875 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 876 return r; 877 } 878 WREG32(scratch, 0xCAFEDEAD); 879 memset(&ib, 0, sizeof(ib)); 880 r = amdgpu_ib_get(adev, NULL, 256, &ib); 881 if (r) { 882 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 883 goto err1; 884 } 885 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 886 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 887 ib.ptr[2] = 0xDEADBEEF; 888 ib.length_dw = 3; 889 890 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 891 if (r) 892 goto err2; 893 894 r = dma_fence_wait_timeout(f, false, timeout); 895 if (r == 0) { 896 DRM_ERROR("amdgpu: IB test timed out.\n"); 897 r = -ETIMEDOUT; 898 goto err2; 899 } else if (r < 0) { 900 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 901 goto err2; 902 } 903 tmp = RREG32(scratch); 904 if (tmp == 0xDEADBEEF) { 905 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 906 r = 0; 907 } else { 908 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 909 scratch, tmp); 910 r = -EINVAL; 911 } 912 err2: 913 amdgpu_ib_free(adev, &ib, NULL); 914 dma_fence_put(f); 915 err1: 916 amdgpu_gfx_scratch_free(adev, scratch); 917 return r; 918 } 919 920 921 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 922 { 923 release_firmware(adev->gfx.pfp_fw); 924 adev->gfx.pfp_fw = NULL; 925 release_firmware(adev->gfx.me_fw); 926 adev->gfx.me_fw = NULL; 927 release_firmware(adev->gfx.ce_fw); 928 adev->gfx.ce_fw = NULL; 929 release_firmware(adev->gfx.rlc_fw); 930 adev->gfx.rlc_fw = NULL; 931 release_firmware(adev->gfx.mec_fw); 932 adev->gfx.mec_fw = NULL; 933 if ((adev->asic_type != CHIP_STONEY) && 934 (adev->asic_type != CHIP_TOPAZ)) 935 release_firmware(adev->gfx.mec2_fw); 936 adev->gfx.mec2_fw = NULL; 937 938 kfree(adev->gfx.rlc.register_list_format); 939 } 940 941 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 942 { 943 const char *chip_name; 944 char fw_name[30]; 945 int err; 946 struct amdgpu_firmware_info *info = NULL; 947 const struct common_firmware_header *header = NULL; 948 const struct gfx_firmware_header_v1_0 *cp_hdr; 949 const struct rlc_firmware_header_v2_0 *rlc_hdr; 950 unsigned int *tmp = NULL, i; 951 952 DRM_DEBUG("\n"); 953 954 switch (adev->asic_type) { 955 case CHIP_TOPAZ: 956 chip_name = "topaz"; 957 break; 958 case CHIP_TONGA: 959 chip_name = "tonga"; 960 break; 961 case CHIP_CARRIZO: 962 chip_name = "carrizo"; 963 break; 964 case CHIP_FIJI: 965 chip_name = "fiji"; 966 break; 967 case CHIP_STONEY: 968 chip_name = "stoney"; 969 break; 970 case CHIP_POLARIS10: 971 chip_name = "polaris10"; 972 break; 973 case CHIP_POLARIS11: 974 chip_name = "polaris11"; 975 break; 976 case CHIP_POLARIS12: 977 chip_name = "polaris12"; 978 break; 979 case CHIP_VEGAM: 980 chip_name = "vegam"; 981 break; 982 default: 983 BUG(); 984 } 985 986 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 987 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 988 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 989 if (err == -ENOENT) { 990 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 991 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 992 } 993 } else { 994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 995 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 996 } 997 if (err) 998 goto out; 999 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1000 if (err) 1001 goto out; 1002 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1003 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1004 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1005 1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1008 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1009 if (err == -ENOENT) { 1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1011 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1012 } 1013 } else { 1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1015 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1016 } 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1023 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 1025 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1026 1027 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1029 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1030 if (err == -ENOENT) { 1031 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1032 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1033 } 1034 } else { 1035 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1036 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1037 } 1038 if (err) 1039 goto out; 1040 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1041 if (err) 1042 goto out; 1043 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1044 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1045 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1046 1047 /* 1048 * Support for MCBP/Virtualization in combination with chained IBs is 1049 * formal released on feature version #46 1050 */ 1051 if (adev->gfx.ce_feature_version >= 46 && 1052 adev->gfx.pfp_feature_version >= 46) { 1053 adev->virt.chained_ib_support = true; 1054 DRM_INFO("Chained IB support enabled!\n"); 1055 } else 1056 adev->virt.chained_ib_support = false; 1057 1058 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1059 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1060 if (err) 1061 goto out; 1062 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1063 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1064 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1065 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1066 1067 adev->gfx.rlc.save_and_restore_offset = 1068 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1069 adev->gfx.rlc.clear_state_descriptor_offset = 1070 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1071 adev->gfx.rlc.avail_scratch_ram_locations = 1072 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1073 adev->gfx.rlc.reg_restore_list_size = 1074 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1075 adev->gfx.rlc.reg_list_format_start = 1076 le32_to_cpu(rlc_hdr->reg_list_format_start); 1077 adev->gfx.rlc.reg_list_format_separate_start = 1078 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1079 adev->gfx.rlc.starting_offsets_start = 1080 le32_to_cpu(rlc_hdr->starting_offsets_start); 1081 adev->gfx.rlc.reg_list_format_size_bytes = 1082 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1083 adev->gfx.rlc.reg_list_size_bytes = 1084 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1085 1086 adev->gfx.rlc.register_list_format = 1087 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1088 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1089 1090 if (!adev->gfx.rlc.register_list_format) { 1091 err = -ENOMEM; 1092 goto out; 1093 } 1094 1095 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1096 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1097 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1098 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1099 1100 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1101 1102 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1103 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1104 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1105 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1106 1107 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1108 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1109 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1110 if (err == -ENOENT) { 1111 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1112 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1113 } 1114 } else { 1115 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1116 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1117 } 1118 if (err) 1119 goto out; 1120 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1121 if (err) 1122 goto out; 1123 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1124 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1125 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1126 1127 if ((adev->asic_type != CHIP_STONEY) && 1128 (adev->asic_type != CHIP_TOPAZ)) { 1129 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1131 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1132 if (err == -ENOENT) { 1133 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1134 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1135 } 1136 } else { 1137 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1138 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1139 } 1140 if (!err) { 1141 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1142 if (err) 1143 goto out; 1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1145 adev->gfx.mec2_fw->data; 1146 adev->gfx.mec2_fw_version = 1147 le32_to_cpu(cp_hdr->header.ucode_version); 1148 adev->gfx.mec2_feature_version = 1149 le32_to_cpu(cp_hdr->ucode_feature_version); 1150 } else { 1151 err = 0; 1152 adev->gfx.mec2_fw = NULL; 1153 } 1154 } 1155 1156 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1158 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1159 info->fw = adev->gfx.pfp_fw; 1160 header = (const struct common_firmware_header *)info->fw->data; 1161 adev->firmware.fw_size += 1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1163 1164 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1165 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1166 info->fw = adev->gfx.me_fw; 1167 header = (const struct common_firmware_header *)info->fw->data; 1168 adev->firmware.fw_size += 1169 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1170 1171 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1172 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1173 info->fw = adev->gfx.ce_fw; 1174 header = (const struct common_firmware_header *)info->fw->data; 1175 adev->firmware.fw_size += 1176 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1177 1178 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1179 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1180 info->fw = adev->gfx.rlc_fw; 1181 header = (const struct common_firmware_header *)info->fw->data; 1182 adev->firmware.fw_size += 1183 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1184 1185 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1186 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1187 info->fw = adev->gfx.mec_fw; 1188 header = (const struct common_firmware_header *)info->fw->data; 1189 adev->firmware.fw_size += 1190 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1191 1192 /* we need account JT in */ 1193 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1194 adev->firmware.fw_size += 1195 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1196 1197 if (amdgpu_sriov_vf(adev)) { 1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1199 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1200 info->fw = adev->gfx.mec_fw; 1201 adev->firmware.fw_size += 1202 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1203 } 1204 1205 if (adev->gfx.mec2_fw) { 1206 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1207 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1208 info->fw = adev->gfx.mec2_fw; 1209 header = (const struct common_firmware_header *)info->fw->data; 1210 adev->firmware.fw_size += 1211 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1212 } 1213 1214 } 1215 1216 out: 1217 if (err) { 1218 dev_err(adev->dev, 1219 "gfx8: Failed to load firmware \"%s\"\n", 1220 fw_name); 1221 release_firmware(adev->gfx.pfp_fw); 1222 adev->gfx.pfp_fw = NULL; 1223 release_firmware(adev->gfx.me_fw); 1224 adev->gfx.me_fw = NULL; 1225 release_firmware(adev->gfx.ce_fw); 1226 adev->gfx.ce_fw = NULL; 1227 release_firmware(adev->gfx.rlc_fw); 1228 adev->gfx.rlc_fw = NULL; 1229 release_firmware(adev->gfx.mec_fw); 1230 adev->gfx.mec_fw = NULL; 1231 release_firmware(adev->gfx.mec2_fw); 1232 adev->gfx.mec2_fw = NULL; 1233 } 1234 return err; 1235 } 1236 1237 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1238 volatile u32 *buffer) 1239 { 1240 u32 count = 0, i; 1241 const struct cs_section_def *sect = NULL; 1242 const struct cs_extent_def *ext = NULL; 1243 1244 if (adev->gfx.rlc.cs_data == NULL) 1245 return; 1246 if (buffer == NULL) 1247 return; 1248 1249 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1250 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1251 1252 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1253 buffer[count++] = cpu_to_le32(0x80000000); 1254 buffer[count++] = cpu_to_le32(0x80000000); 1255 1256 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1257 for (ext = sect->section; ext->extent != NULL; ++ext) { 1258 if (sect->id == SECT_CONTEXT) { 1259 buffer[count++] = 1260 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1261 buffer[count++] = cpu_to_le32(ext->reg_index - 1262 PACKET3_SET_CONTEXT_REG_START); 1263 for (i = 0; i < ext->reg_count; i++) 1264 buffer[count++] = cpu_to_le32(ext->extent[i]); 1265 } else { 1266 return; 1267 } 1268 } 1269 } 1270 1271 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1272 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1273 PACKET3_SET_CONTEXT_REG_START); 1274 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1275 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1276 1277 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1278 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1279 1280 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1281 buffer[count++] = cpu_to_le32(0); 1282 } 1283 1284 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1285 { 1286 const __le32 *fw_data; 1287 volatile u32 *dst_ptr; 1288 int me, i, max_me = 4; 1289 u32 bo_offset = 0; 1290 u32 table_offset, table_size; 1291 1292 if (adev->asic_type == CHIP_CARRIZO) 1293 max_me = 5; 1294 1295 /* write the cp table buffer */ 1296 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1297 for (me = 0; me < max_me; me++) { 1298 if (me == 0) { 1299 const struct gfx_firmware_header_v1_0 *hdr = 1300 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1301 fw_data = (const __le32 *) 1302 (adev->gfx.ce_fw->data + 1303 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1304 table_offset = le32_to_cpu(hdr->jt_offset); 1305 table_size = le32_to_cpu(hdr->jt_size); 1306 } else if (me == 1) { 1307 const struct gfx_firmware_header_v1_0 *hdr = 1308 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1309 fw_data = (const __le32 *) 1310 (adev->gfx.pfp_fw->data + 1311 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1312 table_offset = le32_to_cpu(hdr->jt_offset); 1313 table_size = le32_to_cpu(hdr->jt_size); 1314 } else if (me == 2) { 1315 const struct gfx_firmware_header_v1_0 *hdr = 1316 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1317 fw_data = (const __le32 *) 1318 (adev->gfx.me_fw->data + 1319 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1320 table_offset = le32_to_cpu(hdr->jt_offset); 1321 table_size = le32_to_cpu(hdr->jt_size); 1322 } else if (me == 3) { 1323 const struct gfx_firmware_header_v1_0 *hdr = 1324 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1325 fw_data = (const __le32 *) 1326 (adev->gfx.mec_fw->data + 1327 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1328 table_offset = le32_to_cpu(hdr->jt_offset); 1329 table_size = le32_to_cpu(hdr->jt_size); 1330 } else if (me == 4) { 1331 const struct gfx_firmware_header_v1_0 *hdr = 1332 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1333 fw_data = (const __le32 *) 1334 (adev->gfx.mec2_fw->data + 1335 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1336 table_offset = le32_to_cpu(hdr->jt_offset); 1337 table_size = le32_to_cpu(hdr->jt_size); 1338 } 1339 1340 for (i = 0; i < table_size; i ++) { 1341 dst_ptr[bo_offset + i] = 1342 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1343 } 1344 1345 bo_offset += table_size; 1346 } 1347 } 1348 1349 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1350 { 1351 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1352 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1353 } 1354 1355 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1356 { 1357 volatile u32 *dst_ptr; 1358 u32 dws; 1359 const struct cs_section_def *cs_data; 1360 int r; 1361 1362 adev->gfx.rlc.cs_data = vi_cs_data; 1363 1364 cs_data = adev->gfx.rlc.cs_data; 1365 1366 if (cs_data) { 1367 /* clear state block */ 1368 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1369 1370 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1371 AMDGPU_GEM_DOMAIN_VRAM, 1372 &adev->gfx.rlc.clear_state_obj, 1373 &adev->gfx.rlc.clear_state_gpu_addr, 1374 (void **)&adev->gfx.rlc.cs_ptr); 1375 if (r) { 1376 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1377 gfx_v8_0_rlc_fini(adev); 1378 return r; 1379 } 1380 1381 /* set up the cs buffer */ 1382 dst_ptr = adev->gfx.rlc.cs_ptr; 1383 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1384 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1385 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1386 } 1387 1388 if ((adev->asic_type == CHIP_CARRIZO) || 1389 (adev->asic_type == CHIP_STONEY)) { 1390 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1391 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1392 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1393 &adev->gfx.rlc.cp_table_obj, 1394 &adev->gfx.rlc.cp_table_gpu_addr, 1395 (void **)&adev->gfx.rlc.cp_table_ptr); 1396 if (r) { 1397 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1398 return r; 1399 } 1400 1401 cz_init_cp_jump_table(adev); 1402 1403 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1404 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1405 } 1406 1407 return 0; 1408 } 1409 1410 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1411 { 1412 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1413 } 1414 1415 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1416 { 1417 int r; 1418 u32 *hpd; 1419 size_t mec_hpd_size; 1420 1421 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1422 1423 /* take ownership of the relevant compute queues */ 1424 amdgpu_gfx_compute_queue_acquire(adev); 1425 1426 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1427 1428 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1429 AMDGPU_GEM_DOMAIN_GTT, 1430 &adev->gfx.mec.hpd_eop_obj, 1431 &adev->gfx.mec.hpd_eop_gpu_addr, 1432 (void **)&hpd); 1433 if (r) { 1434 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1435 return r; 1436 } 1437 1438 memset(hpd, 0, mec_hpd_size); 1439 1440 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1441 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1442 1443 return 0; 1444 } 1445 1446 static const u32 vgpr_init_compute_shader[] = 1447 { 1448 0x7e000209, 0x7e020208, 1449 0x7e040207, 0x7e060206, 1450 0x7e080205, 0x7e0a0204, 1451 0x7e0c0203, 0x7e0e0202, 1452 0x7e100201, 0x7e120200, 1453 0x7e140209, 0x7e160208, 1454 0x7e180207, 0x7e1a0206, 1455 0x7e1c0205, 0x7e1e0204, 1456 0x7e200203, 0x7e220202, 1457 0x7e240201, 0x7e260200, 1458 0x7e280209, 0x7e2a0208, 1459 0x7e2c0207, 0x7e2e0206, 1460 0x7e300205, 0x7e320204, 1461 0x7e340203, 0x7e360202, 1462 0x7e380201, 0x7e3a0200, 1463 0x7e3c0209, 0x7e3e0208, 1464 0x7e400207, 0x7e420206, 1465 0x7e440205, 0x7e460204, 1466 0x7e480203, 0x7e4a0202, 1467 0x7e4c0201, 0x7e4e0200, 1468 0x7e500209, 0x7e520208, 1469 0x7e540207, 0x7e560206, 1470 0x7e580205, 0x7e5a0204, 1471 0x7e5c0203, 0x7e5e0202, 1472 0x7e600201, 0x7e620200, 1473 0x7e640209, 0x7e660208, 1474 0x7e680207, 0x7e6a0206, 1475 0x7e6c0205, 0x7e6e0204, 1476 0x7e700203, 0x7e720202, 1477 0x7e740201, 0x7e760200, 1478 0x7e780209, 0x7e7a0208, 1479 0x7e7c0207, 0x7e7e0206, 1480 0xbf8a0000, 0xbf810000, 1481 }; 1482 1483 static const u32 sgpr_init_compute_shader[] = 1484 { 1485 0xbe8a0100, 0xbe8c0102, 1486 0xbe8e0104, 0xbe900106, 1487 0xbe920108, 0xbe940100, 1488 0xbe960102, 0xbe980104, 1489 0xbe9a0106, 0xbe9c0108, 1490 0xbe9e0100, 0xbea00102, 1491 0xbea20104, 0xbea40106, 1492 0xbea60108, 0xbea80100, 1493 0xbeaa0102, 0xbeac0104, 1494 0xbeae0106, 0xbeb00108, 1495 0xbeb20100, 0xbeb40102, 1496 0xbeb60104, 0xbeb80106, 1497 0xbeba0108, 0xbebc0100, 1498 0xbebe0102, 0xbec00104, 1499 0xbec20106, 0xbec40108, 1500 0xbec60100, 0xbec80102, 1501 0xbee60004, 0xbee70005, 1502 0xbeea0006, 0xbeeb0007, 1503 0xbee80008, 0xbee90009, 1504 0xbefc0000, 0xbf8a0000, 1505 0xbf810000, 0x00000000, 1506 }; 1507 1508 static const u32 vgpr_init_regs[] = 1509 { 1510 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1511 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1512 mmCOMPUTE_NUM_THREAD_X, 256*4, 1513 mmCOMPUTE_NUM_THREAD_Y, 1, 1514 mmCOMPUTE_NUM_THREAD_Z, 1, 1515 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1516 mmCOMPUTE_PGM_RSRC2, 20, 1517 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1518 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1519 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1520 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1521 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1522 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1523 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1524 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1525 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1526 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1527 }; 1528 1529 static const u32 sgpr1_init_regs[] = 1530 { 1531 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1532 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1533 mmCOMPUTE_NUM_THREAD_X, 256*5, 1534 mmCOMPUTE_NUM_THREAD_Y, 1, 1535 mmCOMPUTE_NUM_THREAD_Z, 1, 1536 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1537 mmCOMPUTE_PGM_RSRC2, 20, 1538 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1539 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1540 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1541 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1542 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1543 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1544 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1545 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1546 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1547 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1548 }; 1549 1550 static const u32 sgpr2_init_regs[] = 1551 { 1552 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1553 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1554 mmCOMPUTE_NUM_THREAD_X, 256*5, 1555 mmCOMPUTE_NUM_THREAD_Y, 1, 1556 mmCOMPUTE_NUM_THREAD_Z, 1, 1557 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1558 mmCOMPUTE_PGM_RSRC2, 20, 1559 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1560 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1561 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1562 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1563 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1564 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1565 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1566 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1567 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1568 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1569 }; 1570 1571 static const u32 sec_ded_counter_registers[] = 1572 { 1573 mmCPC_EDC_ATC_CNT, 1574 mmCPC_EDC_SCRATCH_CNT, 1575 mmCPC_EDC_UCODE_CNT, 1576 mmCPF_EDC_ATC_CNT, 1577 mmCPF_EDC_ROQ_CNT, 1578 mmCPF_EDC_TAG_CNT, 1579 mmCPG_EDC_ATC_CNT, 1580 mmCPG_EDC_DMA_CNT, 1581 mmCPG_EDC_TAG_CNT, 1582 mmDC_EDC_CSINVOC_CNT, 1583 mmDC_EDC_RESTORE_CNT, 1584 mmDC_EDC_STATE_CNT, 1585 mmGDS_EDC_CNT, 1586 mmGDS_EDC_GRBM_CNT, 1587 mmGDS_EDC_OA_DED, 1588 mmSPI_EDC_CNT, 1589 mmSQC_ATC_EDC_GATCL1_CNT, 1590 mmSQC_EDC_CNT, 1591 mmSQ_EDC_DED_CNT, 1592 mmSQ_EDC_INFO, 1593 mmSQ_EDC_SEC_CNT, 1594 mmTCC_EDC_CNT, 1595 mmTCP_ATC_EDC_GATCL1_CNT, 1596 mmTCP_EDC_CNT, 1597 mmTD_EDC_CNT 1598 }; 1599 1600 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1601 { 1602 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1603 struct amdgpu_ib ib; 1604 struct dma_fence *f = NULL; 1605 int r, i; 1606 u32 tmp; 1607 unsigned total_size, vgpr_offset, sgpr_offset; 1608 u64 gpu_addr; 1609 1610 /* only supported on CZ */ 1611 if (adev->asic_type != CHIP_CARRIZO) 1612 return 0; 1613 1614 /* bail if the compute ring is not ready */ 1615 if (!ring->ready) 1616 return 0; 1617 1618 tmp = RREG32(mmGB_EDC_MODE); 1619 WREG32(mmGB_EDC_MODE, 0); 1620 1621 total_size = 1622 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1623 total_size += 1624 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1625 total_size += 1626 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1627 total_size = ALIGN(total_size, 256); 1628 vgpr_offset = total_size; 1629 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1630 sgpr_offset = total_size; 1631 total_size += sizeof(sgpr_init_compute_shader); 1632 1633 /* allocate an indirect buffer to put the commands in */ 1634 memset(&ib, 0, sizeof(ib)); 1635 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1636 if (r) { 1637 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1638 return r; 1639 } 1640 1641 /* load the compute shaders */ 1642 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1643 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1644 1645 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1646 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1647 1648 /* init the ib length to 0 */ 1649 ib.length_dw = 0; 1650 1651 /* VGPR */ 1652 /* write the register state for the compute dispatch */ 1653 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1655 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1656 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1657 } 1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1659 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1664 1665 /* write dispatch packet */ 1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1667 ib.ptr[ib.length_dw++] = 8; /* x */ 1668 ib.ptr[ib.length_dw++] = 1; /* y */ 1669 ib.ptr[ib.length_dw++] = 1; /* z */ 1670 ib.ptr[ib.length_dw++] = 1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1672 1673 /* write CS partial flush packet */ 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1676 1677 /* SGPR1 */ 1678 /* write the register state for the compute dispatch */ 1679 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1681 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1682 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1683 } 1684 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1685 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1687 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1688 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1689 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1690 1691 /* write dispatch packet */ 1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1693 ib.ptr[ib.length_dw++] = 8; /* x */ 1694 ib.ptr[ib.length_dw++] = 1; /* y */ 1695 ib.ptr[ib.length_dw++] = 1; /* z */ 1696 ib.ptr[ib.length_dw++] = 1697 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1698 1699 /* write CS partial flush packet */ 1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1701 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1702 1703 /* SGPR2 */ 1704 /* write the register state for the compute dispatch */ 1705 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1707 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1708 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1709 } 1710 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1711 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1713 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1714 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1715 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1716 1717 /* write dispatch packet */ 1718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1719 ib.ptr[ib.length_dw++] = 8; /* x */ 1720 ib.ptr[ib.length_dw++] = 1; /* y */ 1721 ib.ptr[ib.length_dw++] = 1; /* z */ 1722 ib.ptr[ib.length_dw++] = 1723 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1724 1725 /* write CS partial flush packet */ 1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1727 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1728 1729 /* shedule the ib on the ring */ 1730 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1731 if (r) { 1732 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1733 goto fail; 1734 } 1735 1736 /* wait for the GPU to finish processing the IB */ 1737 r = dma_fence_wait(f, false); 1738 if (r) { 1739 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1740 goto fail; 1741 } 1742 1743 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1744 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1745 WREG32(mmGB_EDC_MODE, tmp); 1746 1747 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1748 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1749 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1750 1751 1752 /* read back registers to clear the counters */ 1753 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1754 RREG32(sec_ded_counter_registers[i]); 1755 1756 fail: 1757 amdgpu_ib_free(adev, &ib, NULL); 1758 dma_fence_put(f); 1759 1760 return r; 1761 } 1762 1763 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1764 { 1765 u32 gb_addr_config; 1766 u32 mc_shared_chmap, mc_arb_ramcfg; 1767 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1768 u32 tmp; 1769 int ret; 1770 1771 switch (adev->asic_type) { 1772 case CHIP_TOPAZ: 1773 adev->gfx.config.max_shader_engines = 1; 1774 adev->gfx.config.max_tile_pipes = 2; 1775 adev->gfx.config.max_cu_per_sh = 6; 1776 adev->gfx.config.max_sh_per_se = 1; 1777 adev->gfx.config.max_backends_per_se = 2; 1778 adev->gfx.config.max_texture_channel_caches = 2; 1779 adev->gfx.config.max_gprs = 256; 1780 adev->gfx.config.max_gs_threads = 32; 1781 adev->gfx.config.max_hw_contexts = 8; 1782 1783 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1784 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1785 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1786 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1787 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1788 break; 1789 case CHIP_FIJI: 1790 adev->gfx.config.max_shader_engines = 4; 1791 adev->gfx.config.max_tile_pipes = 16; 1792 adev->gfx.config.max_cu_per_sh = 16; 1793 adev->gfx.config.max_sh_per_se = 1; 1794 adev->gfx.config.max_backends_per_se = 4; 1795 adev->gfx.config.max_texture_channel_caches = 16; 1796 adev->gfx.config.max_gprs = 256; 1797 adev->gfx.config.max_gs_threads = 32; 1798 adev->gfx.config.max_hw_contexts = 8; 1799 1800 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1801 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1802 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1803 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1804 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1805 break; 1806 case CHIP_POLARIS11: 1807 case CHIP_POLARIS12: 1808 ret = amdgpu_atombios_get_gfx_info(adev); 1809 if (ret) 1810 return ret; 1811 adev->gfx.config.max_gprs = 256; 1812 adev->gfx.config.max_gs_threads = 32; 1813 adev->gfx.config.max_hw_contexts = 8; 1814 1815 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1816 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1817 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1818 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1819 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1820 break; 1821 case CHIP_POLARIS10: 1822 case CHIP_VEGAM: 1823 ret = amdgpu_atombios_get_gfx_info(adev); 1824 if (ret) 1825 return ret; 1826 adev->gfx.config.max_gprs = 256; 1827 adev->gfx.config.max_gs_threads = 32; 1828 adev->gfx.config.max_hw_contexts = 8; 1829 1830 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1831 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1832 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1833 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1834 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1835 break; 1836 case CHIP_TONGA: 1837 adev->gfx.config.max_shader_engines = 4; 1838 adev->gfx.config.max_tile_pipes = 8; 1839 adev->gfx.config.max_cu_per_sh = 8; 1840 adev->gfx.config.max_sh_per_se = 1; 1841 adev->gfx.config.max_backends_per_se = 2; 1842 adev->gfx.config.max_texture_channel_caches = 8; 1843 adev->gfx.config.max_gprs = 256; 1844 adev->gfx.config.max_gs_threads = 32; 1845 adev->gfx.config.max_hw_contexts = 8; 1846 1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1852 break; 1853 case CHIP_CARRIZO: 1854 adev->gfx.config.max_shader_engines = 1; 1855 adev->gfx.config.max_tile_pipes = 2; 1856 adev->gfx.config.max_sh_per_se = 1; 1857 adev->gfx.config.max_backends_per_se = 2; 1858 adev->gfx.config.max_cu_per_sh = 8; 1859 adev->gfx.config.max_texture_channel_caches = 2; 1860 adev->gfx.config.max_gprs = 256; 1861 adev->gfx.config.max_gs_threads = 32; 1862 adev->gfx.config.max_hw_contexts = 8; 1863 1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1868 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1869 break; 1870 case CHIP_STONEY: 1871 adev->gfx.config.max_shader_engines = 1; 1872 adev->gfx.config.max_tile_pipes = 2; 1873 adev->gfx.config.max_sh_per_se = 1; 1874 adev->gfx.config.max_backends_per_se = 1; 1875 adev->gfx.config.max_cu_per_sh = 3; 1876 adev->gfx.config.max_texture_channel_caches = 2; 1877 adev->gfx.config.max_gprs = 256; 1878 adev->gfx.config.max_gs_threads = 16; 1879 adev->gfx.config.max_hw_contexts = 8; 1880 1881 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1882 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1883 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1884 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1885 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1886 break; 1887 default: 1888 adev->gfx.config.max_shader_engines = 2; 1889 adev->gfx.config.max_tile_pipes = 4; 1890 adev->gfx.config.max_cu_per_sh = 2; 1891 adev->gfx.config.max_sh_per_se = 1; 1892 adev->gfx.config.max_backends_per_se = 2; 1893 adev->gfx.config.max_texture_channel_caches = 4; 1894 adev->gfx.config.max_gprs = 256; 1895 adev->gfx.config.max_gs_threads = 32; 1896 adev->gfx.config.max_hw_contexts = 8; 1897 1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1902 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1903 break; 1904 } 1905 1906 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1907 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1908 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1909 1910 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1911 adev->gfx.config.mem_max_burst_length_bytes = 256; 1912 if (adev->flags & AMD_IS_APU) { 1913 /* Get memory bank mapping mode. */ 1914 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1915 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1916 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1917 1918 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1919 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1920 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1921 1922 /* Validate settings in case only one DIMM installed. */ 1923 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1924 dimm00_addr_map = 0; 1925 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1926 dimm01_addr_map = 0; 1927 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1928 dimm10_addr_map = 0; 1929 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1930 dimm11_addr_map = 0; 1931 1932 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1933 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1934 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1935 adev->gfx.config.mem_row_size_in_kb = 2; 1936 else 1937 adev->gfx.config.mem_row_size_in_kb = 1; 1938 } else { 1939 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1940 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1941 if (adev->gfx.config.mem_row_size_in_kb > 4) 1942 adev->gfx.config.mem_row_size_in_kb = 4; 1943 } 1944 1945 adev->gfx.config.shader_engine_tile_size = 32; 1946 adev->gfx.config.num_gpus = 1; 1947 adev->gfx.config.multi_gpu_tile_size = 64; 1948 1949 /* fix up row size */ 1950 switch (adev->gfx.config.mem_row_size_in_kb) { 1951 case 1: 1952 default: 1953 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1954 break; 1955 case 2: 1956 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1957 break; 1958 case 4: 1959 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1960 break; 1961 } 1962 adev->gfx.config.gb_addr_config = gb_addr_config; 1963 1964 return 0; 1965 } 1966 1967 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1968 int mec, int pipe, int queue) 1969 { 1970 int r; 1971 unsigned irq_type; 1972 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1973 1974 ring = &adev->gfx.compute_ring[ring_id]; 1975 1976 /* mec0 is me1 */ 1977 ring->me = mec + 1; 1978 ring->pipe = pipe; 1979 ring->queue = queue; 1980 1981 ring->ring_obj = NULL; 1982 ring->use_doorbell = true; 1983 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 1984 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1985 + (ring_id * GFX8_MEC_HPD_SIZE); 1986 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1987 1988 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1989 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1990 + ring->pipe; 1991 1992 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1993 r = amdgpu_ring_init(adev, ring, 1024, 1994 &adev->gfx.eop_irq, irq_type); 1995 if (r) 1996 return r; 1997 1998 1999 return 0; 2000 } 2001 2002 static int gfx_v8_0_sw_init(void *handle) 2003 { 2004 int i, j, k, r, ring_id; 2005 struct amdgpu_ring *ring; 2006 struct amdgpu_kiq *kiq; 2007 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2008 2009 switch (adev->asic_type) { 2010 case CHIP_TONGA: 2011 case CHIP_CARRIZO: 2012 case CHIP_FIJI: 2013 case CHIP_POLARIS10: 2014 case CHIP_POLARIS11: 2015 case CHIP_POLARIS12: 2016 case CHIP_VEGAM: 2017 adev->gfx.mec.num_mec = 2; 2018 break; 2019 case CHIP_TOPAZ: 2020 case CHIP_STONEY: 2021 default: 2022 adev->gfx.mec.num_mec = 1; 2023 break; 2024 } 2025 2026 adev->gfx.mec.num_pipe_per_mec = 4; 2027 adev->gfx.mec.num_queue_per_pipe = 8; 2028 2029 /* KIQ event */ 2030 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2031 if (r) 2032 return r; 2033 2034 /* EOP Event */ 2035 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 2036 if (r) 2037 return r; 2038 2039 /* Privileged reg */ 2040 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 2041 &adev->gfx.priv_reg_irq); 2042 if (r) 2043 return r; 2044 2045 /* Privileged inst */ 2046 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 2047 &adev->gfx.priv_inst_irq); 2048 if (r) 2049 return r; 2050 2051 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2052 2053 gfx_v8_0_scratch_init(adev); 2054 2055 r = gfx_v8_0_init_microcode(adev); 2056 if (r) { 2057 DRM_ERROR("Failed to load gfx firmware!\n"); 2058 return r; 2059 } 2060 2061 r = gfx_v8_0_rlc_init(adev); 2062 if (r) { 2063 DRM_ERROR("Failed to init rlc BOs!\n"); 2064 return r; 2065 } 2066 2067 r = gfx_v8_0_mec_init(adev); 2068 if (r) { 2069 DRM_ERROR("Failed to init MEC BOs!\n"); 2070 return r; 2071 } 2072 2073 /* set up the gfx ring */ 2074 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2075 ring = &adev->gfx.gfx_ring[i]; 2076 ring->ring_obj = NULL; 2077 sprintf(ring->name, "gfx"); 2078 /* no gfx doorbells on iceland */ 2079 if (adev->asic_type != CHIP_TOPAZ) { 2080 ring->use_doorbell = true; 2081 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2082 } 2083 2084 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2085 AMDGPU_CP_IRQ_GFX_EOP); 2086 if (r) 2087 return r; 2088 } 2089 2090 2091 /* set up the compute queues - allocate horizontally across pipes */ 2092 ring_id = 0; 2093 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2094 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2095 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2096 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2097 continue; 2098 2099 r = gfx_v8_0_compute_ring_init(adev, 2100 ring_id, 2101 i, k, j); 2102 if (r) 2103 return r; 2104 2105 ring_id++; 2106 } 2107 } 2108 } 2109 2110 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2111 if (r) { 2112 DRM_ERROR("Failed to init KIQ BOs!\n"); 2113 return r; 2114 } 2115 2116 kiq = &adev->gfx.kiq; 2117 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2118 if (r) 2119 return r; 2120 2121 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2122 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2123 if (r) 2124 return r; 2125 2126 /* reserve GDS, GWS and OA resource for gfx */ 2127 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2128 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2129 &adev->gds.gds_gfx_bo, NULL, NULL); 2130 if (r) 2131 return r; 2132 2133 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2134 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2135 &adev->gds.gws_gfx_bo, NULL, NULL); 2136 if (r) 2137 return r; 2138 2139 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2140 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2141 &adev->gds.oa_gfx_bo, NULL, NULL); 2142 if (r) 2143 return r; 2144 2145 adev->gfx.ce_ram_size = 0x8000; 2146 2147 r = gfx_v8_0_gpu_early_init(adev); 2148 if (r) 2149 return r; 2150 2151 return 0; 2152 } 2153 2154 static int gfx_v8_0_sw_fini(void *handle) 2155 { 2156 int i; 2157 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2158 2159 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2160 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2161 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2162 2163 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2164 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2165 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2166 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2167 2168 amdgpu_gfx_compute_mqd_sw_fini(adev); 2169 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2170 amdgpu_gfx_kiq_fini(adev); 2171 2172 gfx_v8_0_mec_fini(adev); 2173 gfx_v8_0_rlc_fini(adev); 2174 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2175 &adev->gfx.rlc.clear_state_gpu_addr, 2176 (void **)&adev->gfx.rlc.cs_ptr); 2177 if ((adev->asic_type == CHIP_CARRIZO) || 2178 (adev->asic_type == CHIP_STONEY)) { 2179 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2180 &adev->gfx.rlc.cp_table_gpu_addr, 2181 (void **)&adev->gfx.rlc.cp_table_ptr); 2182 } 2183 gfx_v8_0_free_microcode(adev); 2184 2185 return 0; 2186 } 2187 2188 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2189 { 2190 uint32_t *modearray, *mod2array; 2191 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2192 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2193 u32 reg_offset; 2194 2195 modearray = adev->gfx.config.tile_mode_array; 2196 mod2array = adev->gfx.config.macrotile_mode_array; 2197 2198 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2199 modearray[reg_offset] = 0; 2200 2201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2202 mod2array[reg_offset] = 0; 2203 2204 switch (adev->asic_type) { 2205 case CHIP_TOPAZ: 2206 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2207 PIPE_CONFIG(ADDR_SURF_P2) | 2208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2210 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2211 PIPE_CONFIG(ADDR_SURF_P2) | 2212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2214 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2215 PIPE_CONFIG(ADDR_SURF_P2) | 2216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2218 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2219 PIPE_CONFIG(ADDR_SURF_P2) | 2220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2222 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2223 PIPE_CONFIG(ADDR_SURF_P2) | 2224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2226 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2227 PIPE_CONFIG(ADDR_SURF_P2) | 2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2230 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2231 PIPE_CONFIG(ADDR_SURF_P2) | 2232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2234 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2235 PIPE_CONFIG(ADDR_SURF_P2)); 2236 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2237 PIPE_CONFIG(ADDR_SURF_P2) | 2238 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2240 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2241 PIPE_CONFIG(ADDR_SURF_P2) | 2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2244 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2245 PIPE_CONFIG(ADDR_SURF_P2) | 2246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2248 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2249 PIPE_CONFIG(ADDR_SURF_P2) | 2250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2252 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2253 PIPE_CONFIG(ADDR_SURF_P2) | 2254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2256 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2257 PIPE_CONFIG(ADDR_SURF_P2) | 2258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2260 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2261 PIPE_CONFIG(ADDR_SURF_P2) | 2262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2264 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2265 PIPE_CONFIG(ADDR_SURF_P2) | 2266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2268 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2269 PIPE_CONFIG(ADDR_SURF_P2) | 2270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2272 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2273 PIPE_CONFIG(ADDR_SURF_P2) | 2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2276 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2277 PIPE_CONFIG(ADDR_SURF_P2) | 2278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2280 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2281 PIPE_CONFIG(ADDR_SURF_P2) | 2282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2284 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2285 PIPE_CONFIG(ADDR_SURF_P2) | 2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2288 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2289 PIPE_CONFIG(ADDR_SURF_P2) | 2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2292 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2293 PIPE_CONFIG(ADDR_SURF_P2) | 2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2296 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2297 PIPE_CONFIG(ADDR_SURF_P2) | 2298 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2300 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2301 PIPE_CONFIG(ADDR_SURF_P2) | 2302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2304 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2305 PIPE_CONFIG(ADDR_SURF_P2) | 2306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2308 2309 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2312 NUM_BANKS(ADDR_SURF_8_BANK)); 2313 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2316 NUM_BANKS(ADDR_SURF_8_BANK)); 2317 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2320 NUM_BANKS(ADDR_SURF_8_BANK)); 2321 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2324 NUM_BANKS(ADDR_SURF_8_BANK)); 2325 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2328 NUM_BANKS(ADDR_SURF_8_BANK)); 2329 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2332 NUM_BANKS(ADDR_SURF_8_BANK)); 2333 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2336 NUM_BANKS(ADDR_SURF_8_BANK)); 2337 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2340 NUM_BANKS(ADDR_SURF_16_BANK)); 2341 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2344 NUM_BANKS(ADDR_SURF_16_BANK)); 2345 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2348 NUM_BANKS(ADDR_SURF_16_BANK)); 2349 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2352 NUM_BANKS(ADDR_SURF_16_BANK)); 2353 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2356 NUM_BANKS(ADDR_SURF_16_BANK)); 2357 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2360 NUM_BANKS(ADDR_SURF_16_BANK)); 2361 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2364 NUM_BANKS(ADDR_SURF_8_BANK)); 2365 2366 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2367 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2368 reg_offset != 23) 2369 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2370 2371 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2372 if (reg_offset != 7) 2373 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2374 2375 break; 2376 case CHIP_FIJI: 2377 case CHIP_VEGAM: 2378 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2382 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2386 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2390 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2394 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2398 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2402 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2406 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2407 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2410 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2412 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2414 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2416 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2420 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2424 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2425 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2428 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2432 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2436 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2440 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2444 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2448 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2452 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2456 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2460 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2461 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2462 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2464 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2468 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2469 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2472 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2476 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2480 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2484 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2488 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2492 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2496 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2500 2501 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2504 NUM_BANKS(ADDR_SURF_8_BANK)); 2505 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2508 NUM_BANKS(ADDR_SURF_8_BANK)); 2509 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2512 NUM_BANKS(ADDR_SURF_8_BANK)); 2513 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2516 NUM_BANKS(ADDR_SURF_8_BANK)); 2517 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2520 NUM_BANKS(ADDR_SURF_8_BANK)); 2521 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2524 NUM_BANKS(ADDR_SURF_8_BANK)); 2525 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2528 NUM_BANKS(ADDR_SURF_8_BANK)); 2529 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2532 NUM_BANKS(ADDR_SURF_8_BANK)); 2533 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2536 NUM_BANKS(ADDR_SURF_8_BANK)); 2537 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2540 NUM_BANKS(ADDR_SURF_8_BANK)); 2541 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2544 NUM_BANKS(ADDR_SURF_8_BANK)); 2545 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2548 NUM_BANKS(ADDR_SURF_8_BANK)); 2549 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2552 NUM_BANKS(ADDR_SURF_8_BANK)); 2553 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2556 NUM_BANKS(ADDR_SURF_4_BANK)); 2557 2558 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2559 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2560 2561 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2562 if (reg_offset != 7) 2563 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2564 2565 break; 2566 case CHIP_TONGA: 2567 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2569 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2570 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2571 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2573 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2575 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2577 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2578 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2579 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2581 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2582 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2583 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2585 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2586 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2587 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2589 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2591 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2593 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2595 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2596 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2597 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2599 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2601 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2605 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2609 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2613 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2617 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2621 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2623 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2625 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2629 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2633 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2635 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2637 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2639 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2641 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2643 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2645 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2649 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2650 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2653 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2657 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2661 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2665 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2669 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2673 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2675 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2677 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2679 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2681 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2683 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2685 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2687 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2689 2690 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2693 NUM_BANKS(ADDR_SURF_16_BANK)); 2694 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2697 NUM_BANKS(ADDR_SURF_16_BANK)); 2698 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2701 NUM_BANKS(ADDR_SURF_16_BANK)); 2702 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2703 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2704 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2705 NUM_BANKS(ADDR_SURF_16_BANK)); 2706 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2709 NUM_BANKS(ADDR_SURF_16_BANK)); 2710 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2711 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2712 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2713 NUM_BANKS(ADDR_SURF_16_BANK)); 2714 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2715 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2716 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2717 NUM_BANKS(ADDR_SURF_16_BANK)); 2718 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2719 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2720 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2721 NUM_BANKS(ADDR_SURF_16_BANK)); 2722 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2723 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2724 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2725 NUM_BANKS(ADDR_SURF_16_BANK)); 2726 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2729 NUM_BANKS(ADDR_SURF_16_BANK)); 2730 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2733 NUM_BANKS(ADDR_SURF_16_BANK)); 2734 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2737 NUM_BANKS(ADDR_SURF_8_BANK)); 2738 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2741 NUM_BANKS(ADDR_SURF_4_BANK)); 2742 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2745 NUM_BANKS(ADDR_SURF_4_BANK)); 2746 2747 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2748 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2749 2750 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2751 if (reg_offset != 7) 2752 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2753 2754 break; 2755 case CHIP_POLARIS11: 2756 case CHIP_POLARIS12: 2757 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2759 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2761 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2764 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2765 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2769 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2773 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2777 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2779 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2781 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2783 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2785 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2789 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2790 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2791 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2793 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2795 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2799 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2803 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2807 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2809 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2811 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2815 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2819 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2821 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2823 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2827 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2831 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2833 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2835 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2837 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2839 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2841 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2843 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2847 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2851 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2855 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2859 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2863 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2867 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2869 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2871 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2873 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2875 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2879 2880 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2883 NUM_BANKS(ADDR_SURF_16_BANK)); 2884 2885 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2888 NUM_BANKS(ADDR_SURF_16_BANK)); 2889 2890 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2893 NUM_BANKS(ADDR_SURF_16_BANK)); 2894 2895 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2898 NUM_BANKS(ADDR_SURF_16_BANK)); 2899 2900 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2903 NUM_BANKS(ADDR_SURF_16_BANK)); 2904 2905 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2908 NUM_BANKS(ADDR_SURF_16_BANK)); 2909 2910 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2913 NUM_BANKS(ADDR_SURF_16_BANK)); 2914 2915 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2918 NUM_BANKS(ADDR_SURF_16_BANK)); 2919 2920 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2923 NUM_BANKS(ADDR_SURF_16_BANK)); 2924 2925 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2928 NUM_BANKS(ADDR_SURF_16_BANK)); 2929 2930 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2933 NUM_BANKS(ADDR_SURF_16_BANK)); 2934 2935 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2938 NUM_BANKS(ADDR_SURF_16_BANK)); 2939 2940 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2943 NUM_BANKS(ADDR_SURF_8_BANK)); 2944 2945 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2948 NUM_BANKS(ADDR_SURF_4_BANK)); 2949 2950 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2951 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2952 2953 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2954 if (reg_offset != 7) 2955 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2956 2957 break; 2958 case CHIP_POLARIS10: 2959 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2961 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2963 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2965 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2966 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2967 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2970 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2971 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2973 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2975 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2977 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2979 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2981 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2982 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2983 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2985 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2987 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2988 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2989 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2991 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2993 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2994 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2997 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3001 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3003 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3005 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3006 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3009 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3011 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3013 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3017 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3019 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3021 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3023 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3025 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3027 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3029 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3031 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3033 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3035 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3037 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3039 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3041 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3042 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3043 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3045 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3049 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3050 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3053 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3057 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3061 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3065 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3067 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3069 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3073 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3075 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3077 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3079 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3081 3082 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3085 NUM_BANKS(ADDR_SURF_16_BANK)); 3086 3087 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3090 NUM_BANKS(ADDR_SURF_16_BANK)); 3091 3092 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3095 NUM_BANKS(ADDR_SURF_16_BANK)); 3096 3097 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3100 NUM_BANKS(ADDR_SURF_16_BANK)); 3101 3102 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3105 NUM_BANKS(ADDR_SURF_16_BANK)); 3106 3107 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3110 NUM_BANKS(ADDR_SURF_16_BANK)); 3111 3112 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3115 NUM_BANKS(ADDR_SURF_16_BANK)); 3116 3117 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3120 NUM_BANKS(ADDR_SURF_16_BANK)); 3121 3122 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3125 NUM_BANKS(ADDR_SURF_16_BANK)); 3126 3127 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3130 NUM_BANKS(ADDR_SURF_16_BANK)); 3131 3132 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3135 NUM_BANKS(ADDR_SURF_16_BANK)); 3136 3137 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3140 NUM_BANKS(ADDR_SURF_8_BANK)); 3141 3142 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3145 NUM_BANKS(ADDR_SURF_4_BANK)); 3146 3147 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3150 NUM_BANKS(ADDR_SURF_4_BANK)); 3151 3152 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3153 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3154 3155 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3156 if (reg_offset != 7) 3157 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3158 3159 break; 3160 case CHIP_STONEY: 3161 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3162 PIPE_CONFIG(ADDR_SURF_P2) | 3163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3165 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3166 PIPE_CONFIG(ADDR_SURF_P2) | 3167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3169 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3170 PIPE_CONFIG(ADDR_SURF_P2) | 3171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3173 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3174 PIPE_CONFIG(ADDR_SURF_P2) | 3175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3177 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3178 PIPE_CONFIG(ADDR_SURF_P2) | 3179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3181 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3182 PIPE_CONFIG(ADDR_SURF_P2) | 3183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3185 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3186 PIPE_CONFIG(ADDR_SURF_P2) | 3187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3189 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3190 PIPE_CONFIG(ADDR_SURF_P2)); 3191 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3192 PIPE_CONFIG(ADDR_SURF_P2) | 3193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3195 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3196 PIPE_CONFIG(ADDR_SURF_P2) | 3197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3199 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3200 PIPE_CONFIG(ADDR_SURF_P2) | 3201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3203 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3204 PIPE_CONFIG(ADDR_SURF_P2) | 3205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3207 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3208 PIPE_CONFIG(ADDR_SURF_P2) | 3209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3211 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3212 PIPE_CONFIG(ADDR_SURF_P2) | 3213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3215 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3216 PIPE_CONFIG(ADDR_SURF_P2) | 3217 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3219 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3220 PIPE_CONFIG(ADDR_SURF_P2) | 3221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3223 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3224 PIPE_CONFIG(ADDR_SURF_P2) | 3225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3227 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3228 PIPE_CONFIG(ADDR_SURF_P2) | 3229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3231 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3232 PIPE_CONFIG(ADDR_SURF_P2) | 3233 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3235 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3236 PIPE_CONFIG(ADDR_SURF_P2) | 3237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3239 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3240 PIPE_CONFIG(ADDR_SURF_P2) | 3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3243 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3244 PIPE_CONFIG(ADDR_SURF_P2) | 3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3247 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3248 PIPE_CONFIG(ADDR_SURF_P2) | 3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3251 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3252 PIPE_CONFIG(ADDR_SURF_P2) | 3253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3255 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3256 PIPE_CONFIG(ADDR_SURF_P2) | 3257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3259 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3260 PIPE_CONFIG(ADDR_SURF_P2) | 3261 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3263 3264 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3267 NUM_BANKS(ADDR_SURF_8_BANK)); 3268 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3271 NUM_BANKS(ADDR_SURF_8_BANK)); 3272 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3275 NUM_BANKS(ADDR_SURF_8_BANK)); 3276 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3279 NUM_BANKS(ADDR_SURF_8_BANK)); 3280 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3283 NUM_BANKS(ADDR_SURF_8_BANK)); 3284 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3287 NUM_BANKS(ADDR_SURF_8_BANK)); 3288 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3291 NUM_BANKS(ADDR_SURF_8_BANK)); 3292 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3295 NUM_BANKS(ADDR_SURF_16_BANK)); 3296 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3299 NUM_BANKS(ADDR_SURF_16_BANK)); 3300 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3303 NUM_BANKS(ADDR_SURF_16_BANK)); 3304 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3307 NUM_BANKS(ADDR_SURF_16_BANK)); 3308 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3311 NUM_BANKS(ADDR_SURF_16_BANK)); 3312 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3315 NUM_BANKS(ADDR_SURF_16_BANK)); 3316 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3319 NUM_BANKS(ADDR_SURF_8_BANK)); 3320 3321 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3322 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3323 reg_offset != 23) 3324 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3325 3326 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3327 if (reg_offset != 7) 3328 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3329 3330 break; 3331 default: 3332 dev_warn(adev->dev, 3333 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3334 adev->asic_type); 3335 3336 case CHIP_CARRIZO: 3337 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3338 PIPE_CONFIG(ADDR_SURF_P2) | 3339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3341 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3342 PIPE_CONFIG(ADDR_SURF_P2) | 3343 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3344 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3345 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3346 PIPE_CONFIG(ADDR_SURF_P2) | 3347 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3349 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3350 PIPE_CONFIG(ADDR_SURF_P2) | 3351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3353 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3354 PIPE_CONFIG(ADDR_SURF_P2) | 3355 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3356 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3357 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3358 PIPE_CONFIG(ADDR_SURF_P2) | 3359 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3360 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3361 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3362 PIPE_CONFIG(ADDR_SURF_P2) | 3363 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3364 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3365 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3366 PIPE_CONFIG(ADDR_SURF_P2)); 3367 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3368 PIPE_CONFIG(ADDR_SURF_P2) | 3369 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3371 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3372 PIPE_CONFIG(ADDR_SURF_P2) | 3373 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3375 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3376 PIPE_CONFIG(ADDR_SURF_P2) | 3377 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3379 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3380 PIPE_CONFIG(ADDR_SURF_P2) | 3381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3383 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3384 PIPE_CONFIG(ADDR_SURF_P2) | 3385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3387 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3388 PIPE_CONFIG(ADDR_SURF_P2) | 3389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3391 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3392 PIPE_CONFIG(ADDR_SURF_P2) | 3393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3395 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3396 PIPE_CONFIG(ADDR_SURF_P2) | 3397 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3399 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3400 PIPE_CONFIG(ADDR_SURF_P2) | 3401 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3403 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3404 PIPE_CONFIG(ADDR_SURF_P2) | 3405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3407 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3408 PIPE_CONFIG(ADDR_SURF_P2) | 3409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3411 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3412 PIPE_CONFIG(ADDR_SURF_P2) | 3413 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3415 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3416 PIPE_CONFIG(ADDR_SURF_P2) | 3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3419 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3420 PIPE_CONFIG(ADDR_SURF_P2) | 3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3423 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3424 PIPE_CONFIG(ADDR_SURF_P2) | 3425 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3427 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3428 PIPE_CONFIG(ADDR_SURF_P2) | 3429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3431 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3432 PIPE_CONFIG(ADDR_SURF_P2) | 3433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3435 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3436 PIPE_CONFIG(ADDR_SURF_P2) | 3437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3439 3440 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3443 NUM_BANKS(ADDR_SURF_8_BANK)); 3444 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3447 NUM_BANKS(ADDR_SURF_8_BANK)); 3448 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3451 NUM_BANKS(ADDR_SURF_8_BANK)); 3452 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3455 NUM_BANKS(ADDR_SURF_8_BANK)); 3456 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3459 NUM_BANKS(ADDR_SURF_8_BANK)); 3460 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3463 NUM_BANKS(ADDR_SURF_8_BANK)); 3464 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3467 NUM_BANKS(ADDR_SURF_8_BANK)); 3468 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3471 NUM_BANKS(ADDR_SURF_16_BANK)); 3472 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3475 NUM_BANKS(ADDR_SURF_16_BANK)); 3476 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3479 NUM_BANKS(ADDR_SURF_16_BANK)); 3480 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3483 NUM_BANKS(ADDR_SURF_16_BANK)); 3484 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3487 NUM_BANKS(ADDR_SURF_16_BANK)); 3488 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3491 NUM_BANKS(ADDR_SURF_16_BANK)); 3492 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3495 NUM_BANKS(ADDR_SURF_8_BANK)); 3496 3497 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3498 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3499 reg_offset != 23) 3500 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3501 3502 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3503 if (reg_offset != 7) 3504 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3505 3506 break; 3507 } 3508 } 3509 3510 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3511 u32 se_num, u32 sh_num, u32 instance) 3512 { 3513 u32 data; 3514 3515 if (instance == 0xffffffff) 3516 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3517 else 3518 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3519 3520 if (se_num == 0xffffffff) 3521 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3522 else 3523 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3524 3525 if (sh_num == 0xffffffff) 3526 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3527 else 3528 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3529 3530 WREG32(mmGRBM_GFX_INDEX, data); 3531 } 3532 3533 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3534 u32 me, u32 pipe, u32 q) 3535 { 3536 vi_srbm_select(adev, me, pipe, q, 0); 3537 } 3538 3539 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3540 { 3541 u32 data, mask; 3542 3543 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3544 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3545 3546 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3547 3548 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3549 adev->gfx.config.max_sh_per_se); 3550 3551 return (~data) & mask; 3552 } 3553 3554 static void 3555 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3556 { 3557 switch (adev->asic_type) { 3558 case CHIP_FIJI: 3559 case CHIP_VEGAM: 3560 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3561 RB_XSEL2(1) | PKR_MAP(2) | 3562 PKR_XSEL(1) | PKR_YSEL(1) | 3563 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3564 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3565 SE_PAIR_YSEL(2); 3566 break; 3567 case CHIP_TONGA: 3568 case CHIP_POLARIS10: 3569 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3570 SE_XSEL(1) | SE_YSEL(1); 3571 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3572 SE_PAIR_YSEL(2); 3573 break; 3574 case CHIP_TOPAZ: 3575 case CHIP_CARRIZO: 3576 *rconf |= RB_MAP_PKR0(2); 3577 *rconf1 |= 0x0; 3578 break; 3579 case CHIP_POLARIS11: 3580 case CHIP_POLARIS12: 3581 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3582 SE_XSEL(1) | SE_YSEL(1); 3583 *rconf1 |= 0x0; 3584 break; 3585 case CHIP_STONEY: 3586 *rconf |= 0x0; 3587 *rconf1 |= 0x0; 3588 break; 3589 default: 3590 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3591 break; 3592 } 3593 } 3594 3595 static void 3596 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3597 u32 raster_config, u32 raster_config_1, 3598 unsigned rb_mask, unsigned num_rb) 3599 { 3600 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3601 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3602 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3603 unsigned rb_per_se = num_rb / num_se; 3604 unsigned se_mask[4]; 3605 unsigned se; 3606 3607 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3608 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3609 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3610 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3611 3612 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3613 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3614 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3615 3616 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3617 (!se_mask[2] && !se_mask[3]))) { 3618 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3619 3620 if (!se_mask[0] && !se_mask[1]) { 3621 raster_config_1 |= 3622 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3623 } else { 3624 raster_config_1 |= 3625 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3626 } 3627 } 3628 3629 for (se = 0; se < num_se; se++) { 3630 unsigned raster_config_se = raster_config; 3631 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3632 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3633 int idx = (se / 2) * 2; 3634 3635 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3636 raster_config_se &= ~SE_MAP_MASK; 3637 3638 if (!se_mask[idx]) { 3639 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3640 } else { 3641 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3642 } 3643 } 3644 3645 pkr0_mask &= rb_mask; 3646 pkr1_mask &= rb_mask; 3647 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3648 raster_config_se &= ~PKR_MAP_MASK; 3649 3650 if (!pkr0_mask) { 3651 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3652 } else { 3653 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3654 } 3655 } 3656 3657 if (rb_per_se >= 2) { 3658 unsigned rb0_mask = 1 << (se * rb_per_se); 3659 unsigned rb1_mask = rb0_mask << 1; 3660 3661 rb0_mask &= rb_mask; 3662 rb1_mask &= rb_mask; 3663 if (!rb0_mask || !rb1_mask) { 3664 raster_config_se &= ~RB_MAP_PKR0_MASK; 3665 3666 if (!rb0_mask) { 3667 raster_config_se |= 3668 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3669 } else { 3670 raster_config_se |= 3671 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3672 } 3673 } 3674 3675 if (rb_per_se > 2) { 3676 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3677 rb1_mask = rb0_mask << 1; 3678 rb0_mask &= rb_mask; 3679 rb1_mask &= rb_mask; 3680 if (!rb0_mask || !rb1_mask) { 3681 raster_config_se &= ~RB_MAP_PKR1_MASK; 3682 3683 if (!rb0_mask) { 3684 raster_config_se |= 3685 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3686 } else { 3687 raster_config_se |= 3688 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3689 } 3690 } 3691 } 3692 } 3693 3694 /* GRBM_GFX_INDEX has a different offset on VI */ 3695 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3696 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3697 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3698 } 3699 3700 /* GRBM_GFX_INDEX has a different offset on VI */ 3701 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3702 } 3703 3704 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3705 { 3706 int i, j; 3707 u32 data; 3708 u32 raster_config = 0, raster_config_1 = 0; 3709 u32 active_rbs = 0; 3710 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3711 adev->gfx.config.max_sh_per_se; 3712 unsigned num_rb_pipes; 3713 3714 mutex_lock(&adev->grbm_idx_mutex); 3715 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3716 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3717 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3718 data = gfx_v8_0_get_rb_active_bitmap(adev); 3719 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3720 rb_bitmap_width_per_sh); 3721 } 3722 } 3723 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3724 3725 adev->gfx.config.backend_enable_mask = active_rbs; 3726 adev->gfx.config.num_rbs = hweight32(active_rbs); 3727 3728 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3729 adev->gfx.config.max_shader_engines, 16); 3730 3731 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3732 3733 if (!adev->gfx.config.backend_enable_mask || 3734 adev->gfx.config.num_rbs >= num_rb_pipes) { 3735 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3736 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3737 } else { 3738 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3739 adev->gfx.config.backend_enable_mask, 3740 num_rb_pipes); 3741 } 3742 3743 /* cache the values for userspace */ 3744 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3745 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3746 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3747 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3748 RREG32(mmCC_RB_BACKEND_DISABLE); 3749 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3750 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3751 adev->gfx.config.rb_config[i][j].raster_config = 3752 RREG32(mmPA_SC_RASTER_CONFIG); 3753 adev->gfx.config.rb_config[i][j].raster_config_1 = 3754 RREG32(mmPA_SC_RASTER_CONFIG_1); 3755 } 3756 } 3757 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3758 mutex_unlock(&adev->grbm_idx_mutex); 3759 } 3760 3761 /** 3762 * gfx_v8_0_init_compute_vmid - gart enable 3763 * 3764 * @adev: amdgpu_device pointer 3765 * 3766 * Initialize compute vmid sh_mem registers 3767 * 3768 */ 3769 #define DEFAULT_SH_MEM_BASES (0x6000) 3770 #define FIRST_COMPUTE_VMID (8) 3771 #define LAST_COMPUTE_VMID (16) 3772 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3773 { 3774 int i; 3775 uint32_t sh_mem_config; 3776 uint32_t sh_mem_bases; 3777 3778 /* 3779 * Configure apertures: 3780 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3781 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3782 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3783 */ 3784 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3785 3786 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3787 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3788 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3789 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3790 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3791 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3792 3793 mutex_lock(&adev->srbm_mutex); 3794 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3795 vi_srbm_select(adev, 0, 0, 0, i); 3796 /* CP and shaders */ 3797 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3798 WREG32(mmSH_MEM_APE1_BASE, 1); 3799 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3800 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3801 } 3802 vi_srbm_select(adev, 0, 0, 0, 0); 3803 mutex_unlock(&adev->srbm_mutex); 3804 } 3805 3806 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3807 { 3808 switch (adev->asic_type) { 3809 default: 3810 adev->gfx.config.double_offchip_lds_buf = 1; 3811 break; 3812 case CHIP_CARRIZO: 3813 case CHIP_STONEY: 3814 adev->gfx.config.double_offchip_lds_buf = 0; 3815 break; 3816 } 3817 } 3818 3819 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3820 { 3821 u32 tmp, sh_static_mem_cfg; 3822 int i; 3823 3824 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3825 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3826 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3827 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3828 3829 gfx_v8_0_tiling_mode_table_init(adev); 3830 gfx_v8_0_setup_rb(adev); 3831 gfx_v8_0_get_cu_info(adev); 3832 gfx_v8_0_config_init(adev); 3833 3834 /* XXX SH_MEM regs */ 3835 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3836 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3837 SWIZZLE_ENABLE, 1); 3838 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3839 ELEMENT_SIZE, 1); 3840 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3841 INDEX_STRIDE, 3); 3842 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3843 3844 mutex_lock(&adev->srbm_mutex); 3845 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3846 vi_srbm_select(adev, 0, 0, 0, i); 3847 /* CP and shaders */ 3848 if (i == 0) { 3849 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3850 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3851 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3852 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3853 WREG32(mmSH_MEM_CONFIG, tmp); 3854 WREG32(mmSH_MEM_BASES, 0); 3855 } else { 3856 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3857 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3858 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3859 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3860 WREG32(mmSH_MEM_CONFIG, tmp); 3861 tmp = adev->gmc.shared_aperture_start >> 48; 3862 WREG32(mmSH_MEM_BASES, tmp); 3863 } 3864 3865 WREG32(mmSH_MEM_APE1_BASE, 1); 3866 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3867 } 3868 vi_srbm_select(adev, 0, 0, 0, 0); 3869 mutex_unlock(&adev->srbm_mutex); 3870 3871 gfx_v8_0_init_compute_vmid(adev); 3872 3873 mutex_lock(&adev->grbm_idx_mutex); 3874 /* 3875 * making sure that the following register writes will be broadcasted 3876 * to all the shaders 3877 */ 3878 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3879 3880 WREG32(mmPA_SC_FIFO_SIZE, 3881 (adev->gfx.config.sc_prim_fifo_size_frontend << 3882 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3883 (adev->gfx.config.sc_prim_fifo_size_backend << 3884 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3885 (adev->gfx.config.sc_hiz_tile_fifo_size << 3886 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3887 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3888 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3889 3890 tmp = RREG32(mmSPI_ARB_PRIORITY); 3891 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3892 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3893 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3894 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3895 WREG32(mmSPI_ARB_PRIORITY, tmp); 3896 3897 mutex_unlock(&adev->grbm_idx_mutex); 3898 3899 } 3900 3901 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3902 { 3903 u32 i, j, k; 3904 u32 mask; 3905 3906 mutex_lock(&adev->grbm_idx_mutex); 3907 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3908 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3909 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3910 for (k = 0; k < adev->usec_timeout; k++) { 3911 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3912 break; 3913 udelay(1); 3914 } 3915 if (k == adev->usec_timeout) { 3916 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3917 0xffffffff, 0xffffffff); 3918 mutex_unlock(&adev->grbm_idx_mutex); 3919 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3920 i, j); 3921 return; 3922 } 3923 } 3924 } 3925 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3926 mutex_unlock(&adev->grbm_idx_mutex); 3927 3928 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3929 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3930 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3931 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3932 for (k = 0; k < adev->usec_timeout; k++) { 3933 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3934 break; 3935 udelay(1); 3936 } 3937 } 3938 3939 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3940 bool enable) 3941 { 3942 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3943 3944 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3945 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3946 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3947 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3948 3949 WREG32(mmCP_INT_CNTL_RING0, tmp); 3950 } 3951 3952 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3953 { 3954 /* csib */ 3955 WREG32(mmRLC_CSIB_ADDR_HI, 3956 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3957 WREG32(mmRLC_CSIB_ADDR_LO, 3958 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3959 WREG32(mmRLC_CSIB_LENGTH, 3960 adev->gfx.rlc.clear_state_size); 3961 } 3962 3963 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3964 int ind_offset, 3965 int list_size, 3966 int *unique_indices, 3967 int *indices_count, 3968 int max_indices, 3969 int *ind_start_offsets, 3970 int *offset_count, 3971 int max_offset) 3972 { 3973 int indices; 3974 bool new_entry = true; 3975 3976 for (; ind_offset < list_size; ind_offset++) { 3977 3978 if (new_entry) { 3979 new_entry = false; 3980 ind_start_offsets[*offset_count] = ind_offset; 3981 *offset_count = *offset_count + 1; 3982 BUG_ON(*offset_count >= max_offset); 3983 } 3984 3985 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3986 new_entry = true; 3987 continue; 3988 } 3989 3990 ind_offset += 2; 3991 3992 /* look for the matching indice */ 3993 for (indices = 0; 3994 indices < *indices_count; 3995 indices++) { 3996 if (unique_indices[indices] == 3997 register_list_format[ind_offset]) 3998 break; 3999 } 4000 4001 if (indices >= *indices_count) { 4002 unique_indices[*indices_count] = 4003 register_list_format[ind_offset]; 4004 indices = *indices_count; 4005 *indices_count = *indices_count + 1; 4006 BUG_ON(*indices_count >= max_indices); 4007 } 4008 4009 register_list_format[ind_offset] = indices; 4010 } 4011 } 4012 4013 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4014 { 4015 int i, temp, data; 4016 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4017 int indices_count = 0; 4018 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4019 int offset_count = 0; 4020 4021 int list_size; 4022 unsigned int *register_list_format = 4023 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4024 if (!register_list_format) 4025 return -ENOMEM; 4026 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4027 adev->gfx.rlc.reg_list_format_size_bytes); 4028 4029 gfx_v8_0_parse_ind_reg_list(register_list_format, 4030 RLC_FormatDirectRegListLength, 4031 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4032 unique_indices, 4033 &indices_count, 4034 ARRAY_SIZE(unique_indices), 4035 indirect_start_offsets, 4036 &offset_count, 4037 ARRAY_SIZE(indirect_start_offsets)); 4038 4039 /* save and restore list */ 4040 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4041 4042 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4043 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4044 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4045 4046 /* indirect list */ 4047 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4048 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4049 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4050 4051 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4052 list_size = list_size >> 1; 4053 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4054 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4055 4056 /* starting offsets starts */ 4057 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4058 adev->gfx.rlc.starting_offsets_start); 4059 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4060 WREG32(mmRLC_GPM_SCRATCH_DATA, 4061 indirect_start_offsets[i]); 4062 4063 /* unique indices */ 4064 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4065 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4066 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4067 if (unique_indices[i] != 0) { 4068 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4069 WREG32(data + i, unique_indices[i] >> 20); 4070 } 4071 } 4072 kfree(register_list_format); 4073 4074 return 0; 4075 } 4076 4077 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4078 { 4079 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4080 } 4081 4082 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4083 { 4084 uint32_t data; 4085 4086 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4087 4088 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4089 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4090 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4091 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4092 WREG32(mmRLC_PG_DELAY, data); 4093 4094 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4095 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4096 4097 } 4098 4099 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4100 bool enable) 4101 { 4102 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4103 } 4104 4105 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4106 bool enable) 4107 { 4108 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4109 } 4110 4111 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4112 { 4113 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4114 } 4115 4116 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4117 { 4118 if ((adev->asic_type == CHIP_CARRIZO) || 4119 (adev->asic_type == CHIP_STONEY)) { 4120 gfx_v8_0_init_csb(adev); 4121 gfx_v8_0_init_save_restore_list(adev); 4122 gfx_v8_0_enable_save_restore_machine(adev); 4123 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4124 gfx_v8_0_init_power_gating(adev); 4125 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4126 } else if ((adev->asic_type == CHIP_POLARIS11) || 4127 (adev->asic_type == CHIP_POLARIS12) || 4128 (adev->asic_type == CHIP_VEGAM)) { 4129 gfx_v8_0_init_csb(adev); 4130 gfx_v8_0_init_save_restore_list(adev); 4131 gfx_v8_0_enable_save_restore_machine(adev); 4132 gfx_v8_0_init_power_gating(adev); 4133 } 4134 4135 } 4136 4137 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4138 { 4139 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4140 4141 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4142 gfx_v8_0_wait_for_rlc_serdes(adev); 4143 } 4144 4145 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4146 { 4147 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4148 udelay(50); 4149 4150 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4151 udelay(50); 4152 } 4153 4154 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4155 { 4156 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4157 4158 /* carrizo do enable cp interrupt after cp inited */ 4159 if (!(adev->flags & AMD_IS_APU)) 4160 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4161 4162 udelay(50); 4163 } 4164 4165 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4166 { 4167 const struct rlc_firmware_header_v2_0 *hdr; 4168 const __le32 *fw_data; 4169 unsigned i, fw_size; 4170 4171 if (!adev->gfx.rlc_fw) 4172 return -EINVAL; 4173 4174 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4175 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4176 4177 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4178 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4179 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4180 4181 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4182 for (i = 0; i < fw_size; i++) 4183 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4184 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4185 4186 return 0; 4187 } 4188 4189 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4190 { 4191 int r; 4192 u32 tmp; 4193 4194 gfx_v8_0_rlc_stop(adev); 4195 4196 /* disable CG */ 4197 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4198 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4199 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4200 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4201 if (adev->asic_type == CHIP_POLARIS11 || 4202 adev->asic_type == CHIP_POLARIS10 || 4203 adev->asic_type == CHIP_POLARIS12 || 4204 adev->asic_type == CHIP_VEGAM) { 4205 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4206 tmp &= ~0x3; 4207 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4208 } 4209 4210 /* disable PG */ 4211 WREG32(mmRLC_PG_CNTL, 0); 4212 4213 gfx_v8_0_rlc_reset(adev); 4214 gfx_v8_0_init_pg(adev); 4215 4216 4217 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4218 /* legacy rlc firmware loading */ 4219 r = gfx_v8_0_rlc_load_microcode(adev); 4220 if (r) 4221 return r; 4222 } 4223 4224 gfx_v8_0_rlc_start(adev); 4225 4226 return 0; 4227 } 4228 4229 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4230 { 4231 int i; 4232 u32 tmp = RREG32(mmCP_ME_CNTL); 4233 4234 if (enable) { 4235 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4236 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4237 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4238 } else { 4239 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4240 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4241 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4242 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4243 adev->gfx.gfx_ring[i].ready = false; 4244 } 4245 WREG32(mmCP_ME_CNTL, tmp); 4246 udelay(50); 4247 } 4248 4249 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4250 { 4251 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4252 const struct gfx_firmware_header_v1_0 *ce_hdr; 4253 const struct gfx_firmware_header_v1_0 *me_hdr; 4254 const __le32 *fw_data; 4255 unsigned i, fw_size; 4256 4257 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4258 return -EINVAL; 4259 4260 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4261 adev->gfx.pfp_fw->data; 4262 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4263 adev->gfx.ce_fw->data; 4264 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4265 adev->gfx.me_fw->data; 4266 4267 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4268 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4269 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4270 4271 gfx_v8_0_cp_gfx_enable(adev, false); 4272 4273 /* PFP */ 4274 fw_data = (const __le32 *) 4275 (adev->gfx.pfp_fw->data + 4276 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4277 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4278 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4279 for (i = 0; i < fw_size; i++) 4280 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4281 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4282 4283 /* CE */ 4284 fw_data = (const __le32 *) 4285 (adev->gfx.ce_fw->data + 4286 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4287 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4288 WREG32(mmCP_CE_UCODE_ADDR, 0); 4289 for (i = 0; i < fw_size; i++) 4290 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4291 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4292 4293 /* ME */ 4294 fw_data = (const __le32 *) 4295 (adev->gfx.me_fw->data + 4296 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4297 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4298 WREG32(mmCP_ME_RAM_WADDR, 0); 4299 for (i = 0; i < fw_size; i++) 4300 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4301 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4302 4303 return 0; 4304 } 4305 4306 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4307 { 4308 u32 count = 0; 4309 const struct cs_section_def *sect = NULL; 4310 const struct cs_extent_def *ext = NULL; 4311 4312 /* begin clear state */ 4313 count += 2; 4314 /* context control state */ 4315 count += 3; 4316 4317 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4318 for (ext = sect->section; ext->extent != NULL; ++ext) { 4319 if (sect->id == SECT_CONTEXT) 4320 count += 2 + ext->reg_count; 4321 else 4322 return 0; 4323 } 4324 } 4325 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4326 count += 4; 4327 /* end clear state */ 4328 count += 2; 4329 /* clear state */ 4330 count += 2; 4331 4332 return count; 4333 } 4334 4335 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4336 { 4337 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4338 const struct cs_section_def *sect = NULL; 4339 const struct cs_extent_def *ext = NULL; 4340 int r, i; 4341 4342 /* init the CP */ 4343 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4344 WREG32(mmCP_ENDIAN_SWAP, 0); 4345 WREG32(mmCP_DEVICE_ID, 1); 4346 4347 gfx_v8_0_cp_gfx_enable(adev, true); 4348 4349 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4350 if (r) { 4351 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4352 return r; 4353 } 4354 4355 /* clear state buffer */ 4356 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4357 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4358 4359 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4360 amdgpu_ring_write(ring, 0x80000000); 4361 amdgpu_ring_write(ring, 0x80000000); 4362 4363 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4364 for (ext = sect->section; ext->extent != NULL; ++ext) { 4365 if (sect->id == SECT_CONTEXT) { 4366 amdgpu_ring_write(ring, 4367 PACKET3(PACKET3_SET_CONTEXT_REG, 4368 ext->reg_count)); 4369 amdgpu_ring_write(ring, 4370 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4371 for (i = 0; i < ext->reg_count; i++) 4372 amdgpu_ring_write(ring, ext->extent[i]); 4373 } 4374 } 4375 } 4376 4377 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4378 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4379 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4380 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4381 4382 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4383 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4384 4385 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4386 amdgpu_ring_write(ring, 0); 4387 4388 /* init the CE partitions */ 4389 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4390 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4391 amdgpu_ring_write(ring, 0x8000); 4392 amdgpu_ring_write(ring, 0x8000); 4393 4394 amdgpu_ring_commit(ring); 4395 4396 return 0; 4397 } 4398 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4399 { 4400 u32 tmp; 4401 /* no gfx doorbells on iceland */ 4402 if (adev->asic_type == CHIP_TOPAZ) 4403 return; 4404 4405 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4406 4407 if (ring->use_doorbell) { 4408 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4409 DOORBELL_OFFSET, ring->doorbell_index); 4410 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4411 DOORBELL_HIT, 0); 4412 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4413 DOORBELL_EN, 1); 4414 } else { 4415 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4416 } 4417 4418 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4419 4420 if (adev->flags & AMD_IS_APU) 4421 return; 4422 4423 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4424 DOORBELL_RANGE_LOWER, 4425 AMDGPU_DOORBELL_GFX_RING0); 4426 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4427 4428 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4429 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4430 } 4431 4432 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4433 { 4434 struct amdgpu_ring *ring; 4435 u32 tmp; 4436 u32 rb_bufsz; 4437 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4438 int r; 4439 4440 /* Set the write pointer delay */ 4441 WREG32(mmCP_RB_WPTR_DELAY, 0); 4442 4443 /* set the RB to use vmid 0 */ 4444 WREG32(mmCP_RB_VMID, 0); 4445 4446 /* Set ring buffer size */ 4447 ring = &adev->gfx.gfx_ring[0]; 4448 rb_bufsz = order_base_2(ring->ring_size / 8); 4449 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4450 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4451 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4452 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4453 #ifdef __BIG_ENDIAN 4454 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4455 #endif 4456 WREG32(mmCP_RB0_CNTL, tmp); 4457 4458 /* Initialize the ring buffer's read and write pointers */ 4459 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4460 ring->wptr = 0; 4461 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4462 4463 /* set the wb address wether it's enabled or not */ 4464 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4465 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4466 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4467 4468 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4469 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4470 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4471 mdelay(1); 4472 WREG32(mmCP_RB0_CNTL, tmp); 4473 4474 rb_addr = ring->gpu_addr >> 8; 4475 WREG32(mmCP_RB0_BASE, rb_addr); 4476 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4477 4478 gfx_v8_0_set_cpg_door_bell(adev, ring); 4479 /* start the ring */ 4480 amdgpu_ring_clear_ring(ring); 4481 gfx_v8_0_cp_gfx_start(adev); 4482 ring->ready = true; 4483 r = amdgpu_ring_test_ring(ring); 4484 if (r) 4485 ring->ready = false; 4486 4487 return r; 4488 } 4489 4490 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4491 { 4492 int i; 4493 4494 if (enable) { 4495 WREG32(mmCP_MEC_CNTL, 0); 4496 } else { 4497 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4498 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4499 adev->gfx.compute_ring[i].ready = false; 4500 adev->gfx.kiq.ring.ready = false; 4501 } 4502 udelay(50); 4503 } 4504 4505 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4506 { 4507 const struct gfx_firmware_header_v1_0 *mec_hdr; 4508 const __le32 *fw_data; 4509 unsigned i, fw_size; 4510 4511 if (!adev->gfx.mec_fw) 4512 return -EINVAL; 4513 4514 gfx_v8_0_cp_compute_enable(adev, false); 4515 4516 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4517 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4518 4519 fw_data = (const __le32 *) 4520 (adev->gfx.mec_fw->data + 4521 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4522 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4523 4524 /* MEC1 */ 4525 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4526 for (i = 0; i < fw_size; i++) 4527 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4528 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4529 4530 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4531 if (adev->gfx.mec2_fw) { 4532 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4533 4534 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4535 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4536 4537 fw_data = (const __le32 *) 4538 (adev->gfx.mec2_fw->data + 4539 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4540 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4541 4542 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4543 for (i = 0; i < fw_size; i++) 4544 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4545 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4546 } 4547 4548 return 0; 4549 } 4550 4551 /* KIQ functions */ 4552 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4553 { 4554 uint32_t tmp; 4555 struct amdgpu_device *adev = ring->adev; 4556 4557 /* tell RLC which is KIQ queue */ 4558 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4559 tmp &= 0xffffff00; 4560 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4561 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4562 tmp |= 0x80; 4563 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4564 } 4565 4566 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4567 { 4568 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4569 uint32_t scratch, tmp = 0; 4570 uint64_t queue_mask = 0; 4571 int r, i; 4572 4573 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4574 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4575 continue; 4576 4577 /* This situation may be hit in the future if a new HW 4578 * generation exposes more than 64 queues. If so, the 4579 * definition of queue_mask needs updating */ 4580 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4581 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4582 break; 4583 } 4584 4585 queue_mask |= (1ull << i); 4586 } 4587 4588 r = amdgpu_gfx_scratch_get(adev, &scratch); 4589 if (r) { 4590 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4591 return r; 4592 } 4593 WREG32(scratch, 0xCAFEDEAD); 4594 4595 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4596 if (r) { 4597 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4598 amdgpu_gfx_scratch_free(adev, scratch); 4599 return r; 4600 } 4601 /* set resources */ 4602 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4603 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4604 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4605 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4606 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4607 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4608 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4609 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4610 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4611 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4612 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4613 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4614 4615 /* map queues */ 4616 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4617 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4618 amdgpu_ring_write(kiq_ring, 4619 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4620 amdgpu_ring_write(kiq_ring, 4621 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4622 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4623 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4624 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4625 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4626 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4627 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4628 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4629 } 4630 /* write to scratch for completion */ 4631 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4632 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4633 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4634 amdgpu_ring_commit(kiq_ring); 4635 4636 for (i = 0; i < adev->usec_timeout; i++) { 4637 tmp = RREG32(scratch); 4638 if (tmp == 0xDEADBEEF) 4639 break; 4640 DRM_UDELAY(1); 4641 } 4642 if (i >= adev->usec_timeout) { 4643 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4644 scratch, tmp); 4645 r = -EINVAL; 4646 } 4647 amdgpu_gfx_scratch_free(adev, scratch); 4648 4649 return r; 4650 } 4651 4652 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4653 { 4654 int i, r = 0; 4655 4656 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4657 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4658 for (i = 0; i < adev->usec_timeout; i++) { 4659 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4660 break; 4661 udelay(1); 4662 } 4663 if (i == adev->usec_timeout) 4664 r = -ETIMEDOUT; 4665 } 4666 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4667 WREG32(mmCP_HQD_PQ_RPTR, 0); 4668 WREG32(mmCP_HQD_PQ_WPTR, 0); 4669 4670 return r; 4671 } 4672 4673 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4674 { 4675 struct amdgpu_device *adev = ring->adev; 4676 struct vi_mqd *mqd = ring->mqd_ptr; 4677 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4678 uint32_t tmp; 4679 4680 mqd->header = 0xC0310800; 4681 mqd->compute_pipelinestat_enable = 0x00000001; 4682 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4683 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4684 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4685 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4686 mqd->compute_misc_reserved = 0x00000003; 4687 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4688 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4689 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4690 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4691 eop_base_addr = ring->eop_gpu_addr >> 8; 4692 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4693 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4694 4695 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4696 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4697 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4698 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4699 4700 mqd->cp_hqd_eop_control = tmp; 4701 4702 /* enable doorbell? */ 4703 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4704 CP_HQD_PQ_DOORBELL_CONTROL, 4705 DOORBELL_EN, 4706 ring->use_doorbell ? 1 : 0); 4707 4708 mqd->cp_hqd_pq_doorbell_control = tmp; 4709 4710 /* set the pointer to the MQD */ 4711 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4712 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4713 4714 /* set MQD vmid to 0 */ 4715 tmp = RREG32(mmCP_MQD_CONTROL); 4716 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4717 mqd->cp_mqd_control = tmp; 4718 4719 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4720 hqd_gpu_addr = ring->gpu_addr >> 8; 4721 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4722 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4723 4724 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4725 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4726 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4727 (order_base_2(ring->ring_size / 4) - 1)); 4728 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4729 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4730 #ifdef __BIG_ENDIAN 4731 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4732 #endif 4733 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4735 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4736 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4737 mqd->cp_hqd_pq_control = tmp; 4738 4739 /* set the wb address whether it's enabled or not */ 4740 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4741 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4742 mqd->cp_hqd_pq_rptr_report_addr_hi = 4743 upper_32_bits(wb_gpu_addr) & 0xffff; 4744 4745 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4746 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4747 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4748 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4749 4750 tmp = 0; 4751 /* enable the doorbell if requested */ 4752 if (ring->use_doorbell) { 4753 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4754 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4755 DOORBELL_OFFSET, ring->doorbell_index); 4756 4757 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4758 DOORBELL_EN, 1); 4759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4760 DOORBELL_SOURCE, 0); 4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4762 DOORBELL_HIT, 0); 4763 } 4764 4765 mqd->cp_hqd_pq_doorbell_control = tmp; 4766 4767 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4768 ring->wptr = 0; 4769 mqd->cp_hqd_pq_wptr = ring->wptr; 4770 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4771 4772 /* set the vmid for the queue */ 4773 mqd->cp_hqd_vmid = 0; 4774 4775 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4776 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4777 mqd->cp_hqd_persistent_state = tmp; 4778 4779 /* set MTYPE */ 4780 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4781 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4782 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4783 mqd->cp_hqd_ib_control = tmp; 4784 4785 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4786 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4787 mqd->cp_hqd_iq_timer = tmp; 4788 4789 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4790 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4791 mqd->cp_hqd_ctx_save_control = tmp; 4792 4793 /* defaults */ 4794 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4795 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4796 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4797 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4798 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4799 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4800 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4801 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4802 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4803 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4804 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4805 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4806 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4807 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4808 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4809 4810 /* activate the queue */ 4811 mqd->cp_hqd_active = 1; 4812 4813 return 0; 4814 } 4815 4816 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4817 struct vi_mqd *mqd) 4818 { 4819 uint32_t mqd_reg; 4820 uint32_t *mqd_data; 4821 4822 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4823 mqd_data = &mqd->cp_mqd_base_addr_lo; 4824 4825 /* disable wptr polling */ 4826 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4827 4828 /* program all HQD registers */ 4829 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4830 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4831 4832 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4833 * This is safe since EOP RPTR==WPTR for any inactive HQD 4834 * on ASICs that do not support context-save. 4835 * EOP writes/reads can start anywhere in the ring. 4836 */ 4837 if (adev->asic_type != CHIP_TONGA) { 4838 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4839 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4840 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4841 } 4842 4843 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4844 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4845 4846 /* activate the HQD */ 4847 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4848 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4849 4850 return 0; 4851 } 4852 4853 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4854 { 4855 struct amdgpu_device *adev = ring->adev; 4856 struct vi_mqd *mqd = ring->mqd_ptr; 4857 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4858 4859 gfx_v8_0_kiq_setting(ring); 4860 4861 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4862 /* reset MQD to a clean status */ 4863 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4864 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4865 4866 /* reset ring buffer */ 4867 ring->wptr = 0; 4868 amdgpu_ring_clear_ring(ring); 4869 mutex_lock(&adev->srbm_mutex); 4870 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4871 gfx_v8_0_mqd_commit(adev, mqd); 4872 vi_srbm_select(adev, 0, 0, 0, 0); 4873 mutex_unlock(&adev->srbm_mutex); 4874 } else { 4875 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4876 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4877 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4878 mutex_lock(&adev->srbm_mutex); 4879 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4880 gfx_v8_0_mqd_init(ring); 4881 gfx_v8_0_mqd_commit(adev, mqd); 4882 vi_srbm_select(adev, 0, 0, 0, 0); 4883 mutex_unlock(&adev->srbm_mutex); 4884 4885 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4886 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4887 } 4888 4889 return 0; 4890 } 4891 4892 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4893 { 4894 struct amdgpu_device *adev = ring->adev; 4895 struct vi_mqd *mqd = ring->mqd_ptr; 4896 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4897 4898 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4899 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4900 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4901 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4902 mutex_lock(&adev->srbm_mutex); 4903 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4904 gfx_v8_0_mqd_init(ring); 4905 vi_srbm_select(adev, 0, 0, 0, 0); 4906 mutex_unlock(&adev->srbm_mutex); 4907 4908 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4909 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4910 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4911 /* reset MQD to a clean status */ 4912 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4913 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4914 /* reset ring buffer */ 4915 ring->wptr = 0; 4916 amdgpu_ring_clear_ring(ring); 4917 } else { 4918 amdgpu_ring_clear_ring(ring); 4919 } 4920 return 0; 4921 } 4922 4923 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4924 { 4925 if (adev->asic_type > CHIP_TONGA) { 4926 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4927 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4928 } 4929 /* enable doorbells */ 4930 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4931 } 4932 4933 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4934 { 4935 struct amdgpu_ring *ring = NULL; 4936 int r = 0, i; 4937 4938 gfx_v8_0_cp_compute_enable(adev, true); 4939 4940 ring = &adev->gfx.kiq.ring; 4941 4942 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4943 if (unlikely(r != 0)) 4944 goto done; 4945 4946 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4947 if (!r) { 4948 r = gfx_v8_0_kiq_init_queue(ring); 4949 amdgpu_bo_kunmap(ring->mqd_obj); 4950 ring->mqd_ptr = NULL; 4951 } 4952 amdgpu_bo_unreserve(ring->mqd_obj); 4953 if (r) 4954 goto done; 4955 4956 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4957 ring = &adev->gfx.compute_ring[i]; 4958 4959 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4960 if (unlikely(r != 0)) 4961 goto done; 4962 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4963 if (!r) { 4964 r = gfx_v8_0_kcq_init_queue(ring); 4965 amdgpu_bo_kunmap(ring->mqd_obj); 4966 ring->mqd_ptr = NULL; 4967 } 4968 amdgpu_bo_unreserve(ring->mqd_obj); 4969 if (r) 4970 goto done; 4971 } 4972 4973 gfx_v8_0_set_mec_doorbell_range(adev); 4974 4975 r = gfx_v8_0_kiq_kcq_enable(adev); 4976 if (r) 4977 goto done; 4978 4979 /* Test KIQ */ 4980 ring = &adev->gfx.kiq.ring; 4981 ring->ready = true; 4982 r = amdgpu_ring_test_ring(ring); 4983 if (r) { 4984 ring->ready = false; 4985 goto done; 4986 } 4987 4988 /* Test KCQs */ 4989 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4990 ring = &adev->gfx.compute_ring[i]; 4991 ring->ready = true; 4992 r = amdgpu_ring_test_ring(ring); 4993 if (r) 4994 ring->ready = false; 4995 } 4996 4997 done: 4998 return r; 4999 } 5000 5001 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5002 { 5003 int r; 5004 5005 if (!(adev->flags & AMD_IS_APU)) 5006 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5007 5008 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 5009 /* legacy firmware loading */ 5010 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5011 if (r) 5012 return r; 5013 5014 r = gfx_v8_0_cp_compute_load_microcode(adev); 5015 if (r) 5016 return r; 5017 } 5018 5019 r = gfx_v8_0_cp_gfx_resume(adev); 5020 if (r) 5021 return r; 5022 5023 r = gfx_v8_0_kiq_resume(adev); 5024 if (r) 5025 return r; 5026 5027 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5028 5029 return 0; 5030 } 5031 5032 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5033 { 5034 gfx_v8_0_cp_gfx_enable(adev, enable); 5035 gfx_v8_0_cp_compute_enable(adev, enable); 5036 } 5037 5038 static int gfx_v8_0_hw_init(void *handle) 5039 { 5040 int r; 5041 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5042 5043 gfx_v8_0_init_golden_registers(adev); 5044 gfx_v8_0_gpu_init(adev); 5045 5046 r = gfx_v8_0_rlc_resume(adev); 5047 if (r) 5048 return r; 5049 5050 r = gfx_v8_0_cp_resume(adev); 5051 5052 return r; 5053 } 5054 5055 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 5056 { 5057 struct amdgpu_device *adev = kiq_ring->adev; 5058 uint32_t scratch, tmp = 0; 5059 int r, i; 5060 5061 r = amdgpu_gfx_scratch_get(adev, &scratch); 5062 if (r) { 5063 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5064 return r; 5065 } 5066 WREG32(scratch, 0xCAFEDEAD); 5067 5068 r = amdgpu_ring_alloc(kiq_ring, 10); 5069 if (r) { 5070 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5071 amdgpu_gfx_scratch_free(adev, scratch); 5072 return r; 5073 } 5074 5075 /* unmap queues */ 5076 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5077 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5078 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5079 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5080 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5081 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5082 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5083 amdgpu_ring_write(kiq_ring, 0); 5084 amdgpu_ring_write(kiq_ring, 0); 5085 amdgpu_ring_write(kiq_ring, 0); 5086 /* write to scratch for completion */ 5087 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5088 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5089 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5090 amdgpu_ring_commit(kiq_ring); 5091 5092 for (i = 0; i < adev->usec_timeout; i++) { 5093 tmp = RREG32(scratch); 5094 if (tmp == 0xDEADBEEF) 5095 break; 5096 DRM_UDELAY(1); 5097 } 5098 if (i >= adev->usec_timeout) { 5099 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5100 r = -EINVAL; 5101 } 5102 amdgpu_gfx_scratch_free(adev, scratch); 5103 return r; 5104 } 5105 5106 static int gfx_v8_0_hw_fini(void *handle) 5107 { 5108 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5109 int i; 5110 5111 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5112 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5113 5114 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5115 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5116 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5117 5118 if (amdgpu_sriov_vf(adev)) { 5119 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5120 return 0; 5121 } 5122 gfx_v8_0_cp_enable(adev, false); 5123 gfx_v8_0_rlc_stop(adev); 5124 5125 amdgpu_device_ip_set_powergating_state(adev, 5126 AMD_IP_BLOCK_TYPE_GFX, 5127 AMD_PG_STATE_UNGATE); 5128 5129 return 0; 5130 } 5131 5132 static int gfx_v8_0_suspend(void *handle) 5133 { 5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5135 adev->gfx.in_suspend = true; 5136 return gfx_v8_0_hw_fini(adev); 5137 } 5138 5139 static int gfx_v8_0_resume(void *handle) 5140 { 5141 int r; 5142 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5143 5144 r = gfx_v8_0_hw_init(adev); 5145 adev->gfx.in_suspend = false; 5146 return r; 5147 } 5148 5149 static bool gfx_v8_0_is_idle(void *handle) 5150 { 5151 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5152 5153 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5154 return false; 5155 else 5156 return true; 5157 } 5158 5159 static int gfx_v8_0_wait_for_idle(void *handle) 5160 { 5161 unsigned i; 5162 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5163 5164 for (i = 0; i < adev->usec_timeout; i++) { 5165 if (gfx_v8_0_is_idle(handle)) 5166 return 0; 5167 5168 udelay(1); 5169 } 5170 return -ETIMEDOUT; 5171 } 5172 5173 static bool gfx_v8_0_check_soft_reset(void *handle) 5174 { 5175 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5176 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5177 u32 tmp; 5178 5179 /* GRBM_STATUS */ 5180 tmp = RREG32(mmGRBM_STATUS); 5181 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5182 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5183 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5184 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5185 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5186 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5187 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5188 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5189 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5190 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5191 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5192 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5193 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5194 } 5195 5196 /* GRBM_STATUS2 */ 5197 tmp = RREG32(mmGRBM_STATUS2); 5198 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5199 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5200 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5201 5202 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5203 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5204 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5205 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5206 SOFT_RESET_CPF, 1); 5207 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5208 SOFT_RESET_CPC, 1); 5209 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5210 SOFT_RESET_CPG, 1); 5211 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5212 SOFT_RESET_GRBM, 1); 5213 } 5214 5215 /* SRBM_STATUS */ 5216 tmp = RREG32(mmSRBM_STATUS); 5217 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5218 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5219 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5220 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5221 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5222 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5223 5224 if (grbm_soft_reset || srbm_soft_reset) { 5225 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5226 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5227 return true; 5228 } else { 5229 adev->gfx.grbm_soft_reset = 0; 5230 adev->gfx.srbm_soft_reset = 0; 5231 return false; 5232 } 5233 } 5234 5235 static int gfx_v8_0_pre_soft_reset(void *handle) 5236 { 5237 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5238 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5239 5240 if ((!adev->gfx.grbm_soft_reset) && 5241 (!adev->gfx.srbm_soft_reset)) 5242 return 0; 5243 5244 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5245 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5246 5247 /* stop the rlc */ 5248 gfx_v8_0_rlc_stop(adev); 5249 5250 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5251 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5252 /* Disable GFX parsing/prefetching */ 5253 gfx_v8_0_cp_gfx_enable(adev, false); 5254 5255 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5256 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5257 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5258 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5259 int i; 5260 5261 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5262 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5263 5264 mutex_lock(&adev->srbm_mutex); 5265 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5266 gfx_v8_0_deactivate_hqd(adev, 2); 5267 vi_srbm_select(adev, 0, 0, 0, 0); 5268 mutex_unlock(&adev->srbm_mutex); 5269 } 5270 /* Disable MEC parsing/prefetching */ 5271 gfx_v8_0_cp_compute_enable(adev, false); 5272 } 5273 5274 return 0; 5275 } 5276 5277 static int gfx_v8_0_soft_reset(void *handle) 5278 { 5279 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5280 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5281 u32 tmp; 5282 5283 if ((!adev->gfx.grbm_soft_reset) && 5284 (!adev->gfx.srbm_soft_reset)) 5285 return 0; 5286 5287 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5288 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5289 5290 if (grbm_soft_reset || srbm_soft_reset) { 5291 tmp = RREG32(mmGMCON_DEBUG); 5292 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5293 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5294 WREG32(mmGMCON_DEBUG, tmp); 5295 udelay(50); 5296 } 5297 5298 if (grbm_soft_reset) { 5299 tmp = RREG32(mmGRBM_SOFT_RESET); 5300 tmp |= grbm_soft_reset; 5301 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5302 WREG32(mmGRBM_SOFT_RESET, tmp); 5303 tmp = RREG32(mmGRBM_SOFT_RESET); 5304 5305 udelay(50); 5306 5307 tmp &= ~grbm_soft_reset; 5308 WREG32(mmGRBM_SOFT_RESET, tmp); 5309 tmp = RREG32(mmGRBM_SOFT_RESET); 5310 } 5311 5312 if (srbm_soft_reset) { 5313 tmp = RREG32(mmSRBM_SOFT_RESET); 5314 tmp |= srbm_soft_reset; 5315 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5316 WREG32(mmSRBM_SOFT_RESET, tmp); 5317 tmp = RREG32(mmSRBM_SOFT_RESET); 5318 5319 udelay(50); 5320 5321 tmp &= ~srbm_soft_reset; 5322 WREG32(mmSRBM_SOFT_RESET, tmp); 5323 tmp = RREG32(mmSRBM_SOFT_RESET); 5324 } 5325 5326 if (grbm_soft_reset || srbm_soft_reset) { 5327 tmp = RREG32(mmGMCON_DEBUG); 5328 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5329 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5330 WREG32(mmGMCON_DEBUG, tmp); 5331 } 5332 5333 /* Wait a little for things to settle down */ 5334 udelay(50); 5335 5336 return 0; 5337 } 5338 5339 static int gfx_v8_0_post_soft_reset(void *handle) 5340 { 5341 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5342 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5343 5344 if ((!adev->gfx.grbm_soft_reset) && 5345 (!adev->gfx.srbm_soft_reset)) 5346 return 0; 5347 5348 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5349 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5350 5351 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5352 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5353 gfx_v8_0_cp_gfx_resume(adev); 5354 5355 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5356 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5357 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5359 int i; 5360 5361 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5362 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5363 5364 mutex_lock(&adev->srbm_mutex); 5365 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5366 gfx_v8_0_deactivate_hqd(adev, 2); 5367 vi_srbm_select(adev, 0, 0, 0, 0); 5368 mutex_unlock(&adev->srbm_mutex); 5369 } 5370 gfx_v8_0_kiq_resume(adev); 5371 } 5372 gfx_v8_0_rlc_start(adev); 5373 5374 return 0; 5375 } 5376 5377 /** 5378 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5379 * 5380 * @adev: amdgpu_device pointer 5381 * 5382 * Fetches a GPU clock counter snapshot. 5383 * Returns the 64 bit clock counter snapshot. 5384 */ 5385 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5386 { 5387 uint64_t clock; 5388 5389 mutex_lock(&adev->gfx.gpu_clock_mutex); 5390 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5391 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5392 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5393 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5394 return clock; 5395 } 5396 5397 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5398 uint32_t vmid, 5399 uint32_t gds_base, uint32_t gds_size, 5400 uint32_t gws_base, uint32_t gws_size, 5401 uint32_t oa_base, uint32_t oa_size) 5402 { 5403 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5404 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5405 5406 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5407 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5408 5409 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5410 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5411 5412 /* GDS Base */ 5413 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5414 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5415 WRITE_DATA_DST_SEL(0))); 5416 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5417 amdgpu_ring_write(ring, 0); 5418 amdgpu_ring_write(ring, gds_base); 5419 5420 /* GDS Size */ 5421 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5422 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5423 WRITE_DATA_DST_SEL(0))); 5424 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5425 amdgpu_ring_write(ring, 0); 5426 amdgpu_ring_write(ring, gds_size); 5427 5428 /* GWS */ 5429 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5430 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5431 WRITE_DATA_DST_SEL(0))); 5432 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5433 amdgpu_ring_write(ring, 0); 5434 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5435 5436 /* OA */ 5437 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5438 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5439 WRITE_DATA_DST_SEL(0))); 5440 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5441 amdgpu_ring_write(ring, 0); 5442 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5443 } 5444 5445 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5446 { 5447 WREG32(mmSQ_IND_INDEX, 5448 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5449 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5450 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5451 (SQ_IND_INDEX__FORCE_READ_MASK)); 5452 return RREG32(mmSQ_IND_DATA); 5453 } 5454 5455 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5456 uint32_t wave, uint32_t thread, 5457 uint32_t regno, uint32_t num, uint32_t *out) 5458 { 5459 WREG32(mmSQ_IND_INDEX, 5460 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5461 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5462 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5463 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5464 (SQ_IND_INDEX__FORCE_READ_MASK) | 5465 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5466 while (num--) 5467 *(out++) = RREG32(mmSQ_IND_DATA); 5468 } 5469 5470 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5471 { 5472 /* type 0 wave data */ 5473 dst[(*no_fields)++] = 0; 5474 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5475 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5476 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5477 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5478 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5479 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5480 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5481 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5482 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5483 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5484 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5485 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5486 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5487 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5488 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5489 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5490 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5491 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5492 } 5493 5494 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5495 uint32_t wave, uint32_t start, 5496 uint32_t size, uint32_t *dst) 5497 { 5498 wave_read_regs( 5499 adev, simd, wave, 0, 5500 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5501 } 5502 5503 5504 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5505 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5506 .select_se_sh = &gfx_v8_0_select_se_sh, 5507 .read_wave_data = &gfx_v8_0_read_wave_data, 5508 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5509 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5510 }; 5511 5512 static int gfx_v8_0_early_init(void *handle) 5513 { 5514 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5515 5516 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5517 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5518 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5519 gfx_v8_0_set_ring_funcs(adev); 5520 gfx_v8_0_set_irq_funcs(adev); 5521 gfx_v8_0_set_gds_init(adev); 5522 gfx_v8_0_set_rlc_funcs(adev); 5523 5524 return 0; 5525 } 5526 5527 static int gfx_v8_0_late_init(void *handle) 5528 { 5529 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5530 int r; 5531 5532 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5533 if (r) 5534 return r; 5535 5536 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5537 if (r) 5538 return r; 5539 5540 /* requires IBs so do in late init after IB pool is initialized */ 5541 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5542 if (r) 5543 return r; 5544 5545 amdgpu_device_ip_set_powergating_state(adev, 5546 AMD_IP_BLOCK_TYPE_GFX, 5547 AMD_PG_STATE_GATE); 5548 5549 return 0; 5550 } 5551 5552 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5553 bool enable) 5554 { 5555 if ((adev->asic_type == CHIP_POLARIS11) || 5556 (adev->asic_type == CHIP_POLARIS12) || 5557 (adev->asic_type == CHIP_VEGAM)) 5558 /* Send msg to SMU via Powerplay */ 5559 amdgpu_device_ip_set_powergating_state(adev, 5560 AMD_IP_BLOCK_TYPE_SMC, 5561 enable ? 5562 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5563 5564 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5565 } 5566 5567 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5568 bool enable) 5569 { 5570 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5571 } 5572 5573 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5574 bool enable) 5575 { 5576 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5577 } 5578 5579 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5580 bool enable) 5581 { 5582 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5583 } 5584 5585 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5586 bool enable) 5587 { 5588 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5589 5590 /* Read any GFX register to wake up GFX. */ 5591 if (!enable) 5592 RREG32(mmDB_RENDER_CONTROL); 5593 } 5594 5595 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5596 bool enable) 5597 { 5598 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5599 cz_enable_gfx_cg_power_gating(adev, true); 5600 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5601 cz_enable_gfx_pipeline_power_gating(adev, true); 5602 } else { 5603 cz_enable_gfx_cg_power_gating(adev, false); 5604 cz_enable_gfx_pipeline_power_gating(adev, false); 5605 } 5606 } 5607 5608 static int gfx_v8_0_set_powergating_state(void *handle, 5609 enum amd_powergating_state state) 5610 { 5611 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5612 bool enable = (state == AMD_PG_STATE_GATE); 5613 5614 if (amdgpu_sriov_vf(adev)) 5615 return 0; 5616 5617 switch (adev->asic_type) { 5618 case CHIP_CARRIZO: 5619 case CHIP_STONEY: 5620 5621 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5622 cz_enable_sck_slow_down_on_power_up(adev, true); 5623 cz_enable_sck_slow_down_on_power_down(adev, true); 5624 } else { 5625 cz_enable_sck_slow_down_on_power_up(adev, false); 5626 cz_enable_sck_slow_down_on_power_down(adev, false); 5627 } 5628 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5629 cz_enable_cp_power_gating(adev, true); 5630 else 5631 cz_enable_cp_power_gating(adev, false); 5632 5633 cz_update_gfx_cg_power_gating(adev, enable); 5634 5635 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5636 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5637 else 5638 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5639 5640 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5641 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5642 else 5643 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5644 break; 5645 case CHIP_POLARIS11: 5646 case CHIP_POLARIS12: 5647 case CHIP_VEGAM: 5648 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5649 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5650 else 5651 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5652 5653 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5654 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5655 else 5656 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5657 5658 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5659 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5660 else 5661 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5662 break; 5663 default: 5664 break; 5665 } 5666 5667 return 0; 5668 } 5669 5670 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5671 { 5672 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5673 int data; 5674 5675 if (amdgpu_sriov_vf(adev)) 5676 *flags = 0; 5677 5678 /* AMD_CG_SUPPORT_GFX_MGCG */ 5679 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5680 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5681 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5682 5683 /* AMD_CG_SUPPORT_GFX_CGLG */ 5684 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5685 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5686 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5687 5688 /* AMD_CG_SUPPORT_GFX_CGLS */ 5689 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5690 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5691 5692 /* AMD_CG_SUPPORT_GFX_CGTS */ 5693 data = RREG32(mmCGTS_SM_CTRL_REG); 5694 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5695 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5696 5697 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5698 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5699 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5700 5701 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5702 data = RREG32(mmRLC_MEM_SLP_CNTL); 5703 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5704 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5705 5706 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5707 data = RREG32(mmCP_MEM_SLP_CNTL); 5708 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5709 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5710 } 5711 5712 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5713 uint32_t reg_addr, uint32_t cmd) 5714 { 5715 uint32_t data; 5716 5717 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5718 5719 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5720 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5721 5722 data = RREG32(mmRLC_SERDES_WR_CTRL); 5723 if (adev->asic_type == CHIP_STONEY) 5724 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5725 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5726 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5727 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5728 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5729 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5730 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5731 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5732 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5733 else 5734 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5735 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5736 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5737 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5738 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5739 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5740 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5741 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5742 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5743 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5744 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5745 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5746 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5747 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5748 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5749 5750 WREG32(mmRLC_SERDES_WR_CTRL, data); 5751 } 5752 5753 #define MSG_ENTER_RLC_SAFE_MODE 1 5754 #define MSG_EXIT_RLC_SAFE_MODE 0 5755 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5756 #define RLC_GPR_REG2__REQ__SHIFT 0 5757 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5758 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5759 5760 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5761 { 5762 u32 data; 5763 unsigned i; 5764 5765 data = RREG32(mmRLC_CNTL); 5766 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5767 return; 5768 5769 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5770 data |= RLC_SAFE_MODE__CMD_MASK; 5771 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5772 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5773 WREG32(mmRLC_SAFE_MODE, data); 5774 5775 for (i = 0; i < adev->usec_timeout; i++) { 5776 if ((RREG32(mmRLC_GPM_STAT) & 5777 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5778 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5779 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5780 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5781 break; 5782 udelay(1); 5783 } 5784 5785 for (i = 0; i < adev->usec_timeout; i++) { 5786 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5787 break; 5788 udelay(1); 5789 } 5790 adev->gfx.rlc.in_safe_mode = true; 5791 } 5792 } 5793 5794 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5795 { 5796 u32 data = 0; 5797 unsigned i; 5798 5799 data = RREG32(mmRLC_CNTL); 5800 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5801 return; 5802 5803 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5804 if (adev->gfx.rlc.in_safe_mode) { 5805 data |= RLC_SAFE_MODE__CMD_MASK; 5806 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5807 WREG32(mmRLC_SAFE_MODE, data); 5808 adev->gfx.rlc.in_safe_mode = false; 5809 } 5810 } 5811 5812 for (i = 0; i < adev->usec_timeout; i++) { 5813 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5814 break; 5815 udelay(1); 5816 } 5817 } 5818 5819 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5820 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5821 .exit_safe_mode = iceland_exit_rlc_safe_mode 5822 }; 5823 5824 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5825 bool enable) 5826 { 5827 uint32_t temp, data; 5828 5829 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5830 5831 /* It is disabled by HW by default */ 5832 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5833 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5834 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5835 /* 1 - RLC memory Light sleep */ 5836 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5837 5838 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5839 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5840 } 5841 5842 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5843 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5844 if (adev->flags & AMD_IS_APU) 5845 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5846 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5847 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5848 else 5849 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5850 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5851 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5852 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5853 5854 if (temp != data) 5855 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5856 5857 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5858 gfx_v8_0_wait_for_rlc_serdes(adev); 5859 5860 /* 5 - clear mgcg override */ 5861 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5862 5863 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5864 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5865 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5866 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5867 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5868 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5869 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5870 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5871 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5872 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5873 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5874 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5875 if (temp != data) 5876 WREG32(mmCGTS_SM_CTRL_REG, data); 5877 } 5878 udelay(50); 5879 5880 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5881 gfx_v8_0_wait_for_rlc_serdes(adev); 5882 } else { 5883 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5884 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5885 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5886 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5887 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5888 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5889 if (temp != data) 5890 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5891 5892 /* 2 - disable MGLS in RLC */ 5893 data = RREG32(mmRLC_MEM_SLP_CNTL); 5894 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5895 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5896 WREG32(mmRLC_MEM_SLP_CNTL, data); 5897 } 5898 5899 /* 3 - disable MGLS in CP */ 5900 data = RREG32(mmCP_MEM_SLP_CNTL); 5901 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5902 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5903 WREG32(mmCP_MEM_SLP_CNTL, data); 5904 } 5905 5906 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5907 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5908 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5909 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5910 if (temp != data) 5911 WREG32(mmCGTS_SM_CTRL_REG, data); 5912 5913 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5914 gfx_v8_0_wait_for_rlc_serdes(adev); 5915 5916 /* 6 - set mgcg override */ 5917 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5918 5919 udelay(50); 5920 5921 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5922 gfx_v8_0_wait_for_rlc_serdes(adev); 5923 } 5924 5925 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5926 } 5927 5928 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5929 bool enable) 5930 { 5931 uint32_t temp, temp1, data, data1; 5932 5933 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5934 5935 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5936 5937 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5939 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5940 if (temp1 != data1) 5941 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5942 5943 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5944 gfx_v8_0_wait_for_rlc_serdes(adev); 5945 5946 /* 2 - clear cgcg override */ 5947 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5948 5949 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5950 gfx_v8_0_wait_for_rlc_serdes(adev); 5951 5952 /* 3 - write cmd to set CGLS */ 5953 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5954 5955 /* 4 - enable cgcg */ 5956 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5957 5958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5959 /* enable cgls*/ 5960 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5961 5962 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5963 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5964 5965 if (temp1 != data1) 5966 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5967 } else { 5968 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5969 } 5970 5971 if (temp != data) 5972 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5973 5974 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5975 * Cmp_busy/GFX_Idle interrupts 5976 */ 5977 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5978 } else { 5979 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5980 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5981 5982 /* TEST CGCG */ 5983 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5984 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5985 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5986 if (temp1 != data1) 5987 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5988 5989 /* read gfx register to wake up cgcg */ 5990 RREG32(mmCB_CGTT_SCLK_CTRL); 5991 RREG32(mmCB_CGTT_SCLK_CTRL); 5992 RREG32(mmCB_CGTT_SCLK_CTRL); 5993 RREG32(mmCB_CGTT_SCLK_CTRL); 5994 5995 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5996 gfx_v8_0_wait_for_rlc_serdes(adev); 5997 5998 /* write cmd to Set CGCG Overrride */ 5999 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6000 6001 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6002 gfx_v8_0_wait_for_rlc_serdes(adev); 6003 6004 /* write cmd to Clear CGLS */ 6005 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6006 6007 /* disable cgcg, cgls should be disabled too. */ 6008 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6009 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6010 if (temp != data) 6011 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6012 /* enable interrupts again for PG */ 6013 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6014 } 6015 6016 gfx_v8_0_wait_for_rlc_serdes(adev); 6017 6018 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6019 } 6020 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6021 bool enable) 6022 { 6023 if (enable) { 6024 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6025 * === MGCG + MGLS + TS(CG/LS) === 6026 */ 6027 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6028 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6029 } else { 6030 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6031 * === CGCG + CGLS === 6032 */ 6033 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6034 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6035 } 6036 return 0; 6037 } 6038 6039 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6040 enum amd_clockgating_state state) 6041 { 6042 uint32_t msg_id, pp_state = 0; 6043 uint32_t pp_support_state = 0; 6044 6045 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6046 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6047 pp_support_state = PP_STATE_SUPPORT_LS; 6048 pp_state = PP_STATE_LS; 6049 } 6050 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6051 pp_support_state |= PP_STATE_SUPPORT_CG; 6052 pp_state |= PP_STATE_CG; 6053 } 6054 if (state == AMD_CG_STATE_UNGATE) 6055 pp_state = 0; 6056 6057 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6058 PP_BLOCK_GFX_CG, 6059 pp_support_state, 6060 pp_state); 6061 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6062 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6063 } 6064 6065 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6066 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6067 pp_support_state = PP_STATE_SUPPORT_LS; 6068 pp_state = PP_STATE_LS; 6069 } 6070 6071 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6072 pp_support_state |= PP_STATE_SUPPORT_CG; 6073 pp_state |= PP_STATE_CG; 6074 } 6075 6076 if (state == AMD_CG_STATE_UNGATE) 6077 pp_state = 0; 6078 6079 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6080 PP_BLOCK_GFX_MG, 6081 pp_support_state, 6082 pp_state); 6083 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6084 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6085 } 6086 6087 return 0; 6088 } 6089 6090 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6091 enum amd_clockgating_state state) 6092 { 6093 6094 uint32_t msg_id, pp_state = 0; 6095 uint32_t pp_support_state = 0; 6096 6097 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6098 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6099 pp_support_state = PP_STATE_SUPPORT_LS; 6100 pp_state = PP_STATE_LS; 6101 } 6102 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6103 pp_support_state |= PP_STATE_SUPPORT_CG; 6104 pp_state |= PP_STATE_CG; 6105 } 6106 if (state == AMD_CG_STATE_UNGATE) 6107 pp_state = 0; 6108 6109 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6110 PP_BLOCK_GFX_CG, 6111 pp_support_state, 6112 pp_state); 6113 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6114 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6115 } 6116 6117 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6118 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6119 pp_support_state = PP_STATE_SUPPORT_LS; 6120 pp_state = PP_STATE_LS; 6121 } 6122 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6123 pp_support_state |= PP_STATE_SUPPORT_CG; 6124 pp_state |= PP_STATE_CG; 6125 } 6126 if (state == AMD_CG_STATE_UNGATE) 6127 pp_state = 0; 6128 6129 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6130 PP_BLOCK_GFX_3D, 6131 pp_support_state, 6132 pp_state); 6133 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6134 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6135 } 6136 6137 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6138 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6139 pp_support_state = PP_STATE_SUPPORT_LS; 6140 pp_state = PP_STATE_LS; 6141 } 6142 6143 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6144 pp_support_state |= PP_STATE_SUPPORT_CG; 6145 pp_state |= PP_STATE_CG; 6146 } 6147 6148 if (state == AMD_CG_STATE_UNGATE) 6149 pp_state = 0; 6150 6151 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6152 PP_BLOCK_GFX_MG, 6153 pp_support_state, 6154 pp_state); 6155 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6156 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6157 } 6158 6159 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6160 pp_support_state = PP_STATE_SUPPORT_LS; 6161 6162 if (state == AMD_CG_STATE_UNGATE) 6163 pp_state = 0; 6164 else 6165 pp_state = PP_STATE_LS; 6166 6167 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6168 PP_BLOCK_GFX_RLC, 6169 pp_support_state, 6170 pp_state); 6171 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6172 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6173 } 6174 6175 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6176 pp_support_state = PP_STATE_SUPPORT_LS; 6177 6178 if (state == AMD_CG_STATE_UNGATE) 6179 pp_state = 0; 6180 else 6181 pp_state = PP_STATE_LS; 6182 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6183 PP_BLOCK_GFX_CP, 6184 pp_support_state, 6185 pp_state); 6186 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6187 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6188 } 6189 6190 return 0; 6191 } 6192 6193 static int gfx_v8_0_set_clockgating_state(void *handle, 6194 enum amd_clockgating_state state) 6195 { 6196 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6197 6198 if (amdgpu_sriov_vf(adev)) 6199 return 0; 6200 6201 switch (adev->asic_type) { 6202 case CHIP_FIJI: 6203 case CHIP_CARRIZO: 6204 case CHIP_STONEY: 6205 gfx_v8_0_update_gfx_clock_gating(adev, 6206 state == AMD_CG_STATE_GATE); 6207 break; 6208 case CHIP_TONGA: 6209 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6210 break; 6211 case CHIP_POLARIS10: 6212 case CHIP_POLARIS11: 6213 case CHIP_POLARIS12: 6214 case CHIP_VEGAM: 6215 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6216 break; 6217 default: 6218 break; 6219 } 6220 return 0; 6221 } 6222 6223 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6224 { 6225 return ring->adev->wb.wb[ring->rptr_offs]; 6226 } 6227 6228 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6229 { 6230 struct amdgpu_device *adev = ring->adev; 6231 6232 if (ring->use_doorbell) 6233 /* XXX check if swapping is necessary on BE */ 6234 return ring->adev->wb.wb[ring->wptr_offs]; 6235 else 6236 return RREG32(mmCP_RB0_WPTR); 6237 } 6238 6239 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6240 { 6241 struct amdgpu_device *adev = ring->adev; 6242 6243 if (ring->use_doorbell) { 6244 /* XXX check if swapping is necessary on BE */ 6245 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6246 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6247 } else { 6248 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6249 (void)RREG32(mmCP_RB0_WPTR); 6250 } 6251 } 6252 6253 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6254 { 6255 u32 ref_and_mask, reg_mem_engine; 6256 6257 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6258 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6259 switch (ring->me) { 6260 case 1: 6261 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6262 break; 6263 case 2: 6264 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6265 break; 6266 default: 6267 return; 6268 } 6269 reg_mem_engine = 0; 6270 } else { 6271 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6272 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6273 } 6274 6275 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6276 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6277 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6278 reg_mem_engine)); 6279 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6280 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6281 amdgpu_ring_write(ring, ref_and_mask); 6282 amdgpu_ring_write(ring, ref_and_mask); 6283 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6284 } 6285 6286 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6287 { 6288 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6289 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6290 EVENT_INDEX(4)); 6291 6292 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6293 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6294 EVENT_INDEX(0)); 6295 } 6296 6297 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6298 struct amdgpu_ib *ib, 6299 unsigned vmid, bool ctx_switch) 6300 { 6301 u32 header, control = 0; 6302 6303 if (ib->flags & AMDGPU_IB_FLAG_CE) 6304 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6305 else 6306 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6307 6308 control |= ib->length_dw | (vmid << 24); 6309 6310 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6311 control |= INDIRECT_BUFFER_PRE_ENB(1); 6312 6313 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6314 gfx_v8_0_ring_emit_de_meta(ring); 6315 } 6316 6317 amdgpu_ring_write(ring, header); 6318 amdgpu_ring_write(ring, 6319 #ifdef __BIG_ENDIAN 6320 (2 << 0) | 6321 #endif 6322 (ib->gpu_addr & 0xFFFFFFFC)); 6323 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6324 amdgpu_ring_write(ring, control); 6325 } 6326 6327 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6328 struct amdgpu_ib *ib, 6329 unsigned vmid, bool ctx_switch) 6330 { 6331 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6332 6333 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6334 amdgpu_ring_write(ring, 6335 #ifdef __BIG_ENDIAN 6336 (2 << 0) | 6337 #endif 6338 (ib->gpu_addr & 0xFFFFFFFC)); 6339 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6340 amdgpu_ring_write(ring, control); 6341 } 6342 6343 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6344 u64 seq, unsigned flags) 6345 { 6346 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6347 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6348 6349 /* EVENT_WRITE_EOP - flush caches, send int */ 6350 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6351 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6352 EOP_TC_ACTION_EN | 6353 EOP_TC_WB_ACTION_EN | 6354 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6355 EVENT_INDEX(5))); 6356 amdgpu_ring_write(ring, addr & 0xfffffffc); 6357 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6358 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6359 amdgpu_ring_write(ring, lower_32_bits(seq)); 6360 amdgpu_ring_write(ring, upper_32_bits(seq)); 6361 6362 } 6363 6364 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6365 { 6366 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6367 uint32_t seq = ring->fence_drv.sync_seq; 6368 uint64_t addr = ring->fence_drv.gpu_addr; 6369 6370 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6371 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6372 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6373 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6374 amdgpu_ring_write(ring, addr & 0xfffffffc); 6375 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6376 amdgpu_ring_write(ring, seq); 6377 amdgpu_ring_write(ring, 0xffffffff); 6378 amdgpu_ring_write(ring, 4); /* poll interval */ 6379 } 6380 6381 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6382 unsigned vmid, uint64_t pd_addr) 6383 { 6384 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6385 6386 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6387 6388 /* wait for the invalidate to complete */ 6389 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6390 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6391 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6392 WAIT_REG_MEM_ENGINE(0))); /* me */ 6393 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6394 amdgpu_ring_write(ring, 0); 6395 amdgpu_ring_write(ring, 0); /* ref */ 6396 amdgpu_ring_write(ring, 0); /* mask */ 6397 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6398 6399 /* compute doesn't have PFP */ 6400 if (usepfp) { 6401 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6402 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6403 amdgpu_ring_write(ring, 0x0); 6404 } 6405 } 6406 6407 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6408 { 6409 return ring->adev->wb.wb[ring->wptr_offs]; 6410 } 6411 6412 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6413 { 6414 struct amdgpu_device *adev = ring->adev; 6415 6416 /* XXX check if swapping is necessary on BE */ 6417 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6418 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6419 } 6420 6421 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6422 bool acquire) 6423 { 6424 struct amdgpu_device *adev = ring->adev; 6425 int pipe_num, tmp, reg; 6426 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6427 6428 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6429 6430 /* first me only has 2 entries, GFX and HP3D */ 6431 if (ring->me > 0) 6432 pipe_num -= 2; 6433 6434 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6435 tmp = RREG32(reg); 6436 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6437 WREG32(reg, tmp); 6438 } 6439 6440 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6441 struct amdgpu_ring *ring, 6442 bool acquire) 6443 { 6444 int i, pipe; 6445 bool reserve; 6446 struct amdgpu_ring *iring; 6447 6448 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6449 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6450 if (acquire) 6451 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6452 else 6453 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6454 6455 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6456 /* Clear all reservations - everyone reacquires all resources */ 6457 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6458 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6459 true); 6460 6461 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6462 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6463 true); 6464 } else { 6465 /* Lower all pipes without a current reservation */ 6466 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6467 iring = &adev->gfx.gfx_ring[i]; 6468 pipe = amdgpu_gfx_queue_to_bit(adev, 6469 iring->me, 6470 iring->pipe, 6471 0); 6472 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6473 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6474 } 6475 6476 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6477 iring = &adev->gfx.compute_ring[i]; 6478 pipe = amdgpu_gfx_queue_to_bit(adev, 6479 iring->me, 6480 iring->pipe, 6481 0); 6482 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6483 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6484 } 6485 } 6486 6487 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6488 } 6489 6490 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6491 struct amdgpu_ring *ring, 6492 bool acquire) 6493 { 6494 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6495 uint32_t queue_priority = acquire ? 0xf : 0x0; 6496 6497 mutex_lock(&adev->srbm_mutex); 6498 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6499 6500 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6501 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6502 6503 vi_srbm_select(adev, 0, 0, 0, 0); 6504 mutex_unlock(&adev->srbm_mutex); 6505 } 6506 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6507 enum drm_sched_priority priority) 6508 { 6509 struct amdgpu_device *adev = ring->adev; 6510 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6511 6512 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6513 return; 6514 6515 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6516 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6517 } 6518 6519 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6520 u64 addr, u64 seq, 6521 unsigned flags) 6522 { 6523 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6524 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6525 6526 /* RELEASE_MEM - flush caches, send int */ 6527 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6528 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6529 EOP_TC_ACTION_EN | 6530 EOP_TC_WB_ACTION_EN | 6531 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6532 EVENT_INDEX(5))); 6533 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6534 amdgpu_ring_write(ring, addr & 0xfffffffc); 6535 amdgpu_ring_write(ring, upper_32_bits(addr)); 6536 amdgpu_ring_write(ring, lower_32_bits(seq)); 6537 amdgpu_ring_write(ring, upper_32_bits(seq)); 6538 } 6539 6540 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6541 u64 seq, unsigned int flags) 6542 { 6543 /* we only allocate 32bit for each seq wb address */ 6544 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6545 6546 /* write fence seq to the "addr" */ 6547 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6548 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6549 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6550 amdgpu_ring_write(ring, lower_32_bits(addr)); 6551 amdgpu_ring_write(ring, upper_32_bits(addr)); 6552 amdgpu_ring_write(ring, lower_32_bits(seq)); 6553 6554 if (flags & AMDGPU_FENCE_FLAG_INT) { 6555 /* set register to trigger INT */ 6556 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6557 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6558 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6559 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6560 amdgpu_ring_write(ring, 0); 6561 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6562 } 6563 } 6564 6565 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6566 { 6567 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6568 amdgpu_ring_write(ring, 0); 6569 } 6570 6571 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6572 { 6573 uint32_t dw2 = 0; 6574 6575 if (amdgpu_sriov_vf(ring->adev)) 6576 gfx_v8_0_ring_emit_ce_meta(ring); 6577 6578 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6579 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6580 gfx_v8_0_ring_emit_vgt_flush(ring); 6581 /* set load_global_config & load_global_uconfig */ 6582 dw2 |= 0x8001; 6583 /* set load_cs_sh_regs */ 6584 dw2 |= 0x01000000; 6585 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6586 dw2 |= 0x10002; 6587 6588 /* set load_ce_ram if preamble presented */ 6589 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6590 dw2 |= 0x10000000; 6591 } else { 6592 /* still load_ce_ram if this is the first time preamble presented 6593 * although there is no context switch happens. 6594 */ 6595 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6596 dw2 |= 0x10000000; 6597 } 6598 6599 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6600 amdgpu_ring_write(ring, dw2); 6601 amdgpu_ring_write(ring, 0); 6602 } 6603 6604 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6605 { 6606 unsigned ret; 6607 6608 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6609 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6610 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6611 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6612 ret = ring->wptr & ring->buf_mask; 6613 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6614 return ret; 6615 } 6616 6617 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6618 { 6619 unsigned cur; 6620 6621 BUG_ON(offset > ring->buf_mask); 6622 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6623 6624 cur = (ring->wptr & ring->buf_mask) - 1; 6625 if (likely(cur > offset)) 6626 ring->ring[offset] = cur - offset; 6627 else 6628 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6629 } 6630 6631 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6632 { 6633 struct amdgpu_device *adev = ring->adev; 6634 6635 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6636 amdgpu_ring_write(ring, 0 | /* src: register*/ 6637 (5 << 8) | /* dst: memory */ 6638 (1 << 20)); /* write confirm */ 6639 amdgpu_ring_write(ring, reg); 6640 amdgpu_ring_write(ring, 0); 6641 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6642 adev->virt.reg_val_offs * 4)); 6643 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6644 adev->virt.reg_val_offs * 4)); 6645 } 6646 6647 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6648 uint32_t val) 6649 { 6650 uint32_t cmd; 6651 6652 switch (ring->funcs->type) { 6653 case AMDGPU_RING_TYPE_GFX: 6654 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6655 break; 6656 case AMDGPU_RING_TYPE_KIQ: 6657 cmd = 1 << 16; /* no inc addr */ 6658 break; 6659 default: 6660 cmd = WR_CONFIRM; 6661 break; 6662 } 6663 6664 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6665 amdgpu_ring_write(ring, cmd); 6666 amdgpu_ring_write(ring, reg); 6667 amdgpu_ring_write(ring, 0); 6668 amdgpu_ring_write(ring, val); 6669 } 6670 6671 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6672 enum amdgpu_interrupt_state state) 6673 { 6674 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6675 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6676 } 6677 6678 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6679 int me, int pipe, 6680 enum amdgpu_interrupt_state state) 6681 { 6682 u32 mec_int_cntl, mec_int_cntl_reg; 6683 6684 /* 6685 * amdgpu controls only the first MEC. That's why this function only 6686 * handles the setting of interrupts for this specific MEC. All other 6687 * pipes' interrupts are set by amdkfd. 6688 */ 6689 6690 if (me == 1) { 6691 switch (pipe) { 6692 case 0: 6693 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6694 break; 6695 case 1: 6696 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6697 break; 6698 case 2: 6699 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6700 break; 6701 case 3: 6702 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6703 break; 6704 default: 6705 DRM_DEBUG("invalid pipe %d\n", pipe); 6706 return; 6707 } 6708 } else { 6709 DRM_DEBUG("invalid me %d\n", me); 6710 return; 6711 } 6712 6713 switch (state) { 6714 case AMDGPU_IRQ_STATE_DISABLE: 6715 mec_int_cntl = RREG32(mec_int_cntl_reg); 6716 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6717 WREG32(mec_int_cntl_reg, mec_int_cntl); 6718 break; 6719 case AMDGPU_IRQ_STATE_ENABLE: 6720 mec_int_cntl = RREG32(mec_int_cntl_reg); 6721 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6722 WREG32(mec_int_cntl_reg, mec_int_cntl); 6723 break; 6724 default: 6725 break; 6726 } 6727 } 6728 6729 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6730 struct amdgpu_irq_src *source, 6731 unsigned type, 6732 enum amdgpu_interrupt_state state) 6733 { 6734 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6735 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6736 6737 return 0; 6738 } 6739 6740 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6741 struct amdgpu_irq_src *source, 6742 unsigned type, 6743 enum amdgpu_interrupt_state state) 6744 { 6745 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6746 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6747 6748 return 0; 6749 } 6750 6751 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6752 struct amdgpu_irq_src *src, 6753 unsigned type, 6754 enum amdgpu_interrupt_state state) 6755 { 6756 switch (type) { 6757 case AMDGPU_CP_IRQ_GFX_EOP: 6758 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6759 break; 6760 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6761 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6762 break; 6763 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6764 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6765 break; 6766 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6767 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6768 break; 6769 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6770 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6771 break; 6772 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6773 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6774 break; 6775 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6776 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6777 break; 6778 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6779 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6780 break; 6781 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6782 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6783 break; 6784 default: 6785 break; 6786 } 6787 return 0; 6788 } 6789 6790 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6791 struct amdgpu_irq_src *source, 6792 struct amdgpu_iv_entry *entry) 6793 { 6794 int i; 6795 u8 me_id, pipe_id, queue_id; 6796 struct amdgpu_ring *ring; 6797 6798 DRM_DEBUG("IH: CP EOP\n"); 6799 me_id = (entry->ring_id & 0x0c) >> 2; 6800 pipe_id = (entry->ring_id & 0x03) >> 0; 6801 queue_id = (entry->ring_id & 0x70) >> 4; 6802 6803 switch (me_id) { 6804 case 0: 6805 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6806 break; 6807 case 1: 6808 case 2: 6809 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6810 ring = &adev->gfx.compute_ring[i]; 6811 /* Per-queue interrupt is supported for MEC starting from VI. 6812 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6813 */ 6814 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6815 amdgpu_fence_process(ring); 6816 } 6817 break; 6818 } 6819 return 0; 6820 } 6821 6822 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6823 struct amdgpu_irq_src *source, 6824 struct amdgpu_iv_entry *entry) 6825 { 6826 DRM_ERROR("Illegal register access in command stream\n"); 6827 schedule_work(&adev->reset_work); 6828 return 0; 6829 } 6830 6831 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6832 struct amdgpu_irq_src *source, 6833 struct amdgpu_iv_entry *entry) 6834 { 6835 DRM_ERROR("Illegal instruction in command stream\n"); 6836 schedule_work(&adev->reset_work); 6837 return 0; 6838 } 6839 6840 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6841 struct amdgpu_irq_src *src, 6842 unsigned int type, 6843 enum amdgpu_interrupt_state state) 6844 { 6845 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6846 6847 switch (type) { 6848 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6849 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6850 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6851 if (ring->me == 1) 6852 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6853 ring->pipe, 6854 GENERIC2_INT_ENABLE, 6855 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6856 else 6857 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6858 ring->pipe, 6859 GENERIC2_INT_ENABLE, 6860 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6861 break; 6862 default: 6863 BUG(); /* kiq only support GENERIC2_INT now */ 6864 break; 6865 } 6866 return 0; 6867 } 6868 6869 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 6870 struct amdgpu_irq_src *source, 6871 struct amdgpu_iv_entry *entry) 6872 { 6873 u8 me_id, pipe_id, queue_id; 6874 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6875 6876 me_id = (entry->ring_id & 0x0c) >> 2; 6877 pipe_id = (entry->ring_id & 0x03) >> 0; 6878 queue_id = (entry->ring_id & 0x70) >> 4; 6879 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 6880 me_id, pipe_id, queue_id); 6881 6882 amdgpu_fence_process(ring); 6883 return 0; 6884 } 6885 6886 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6887 .name = "gfx_v8_0", 6888 .early_init = gfx_v8_0_early_init, 6889 .late_init = gfx_v8_0_late_init, 6890 .sw_init = gfx_v8_0_sw_init, 6891 .sw_fini = gfx_v8_0_sw_fini, 6892 .hw_init = gfx_v8_0_hw_init, 6893 .hw_fini = gfx_v8_0_hw_fini, 6894 .suspend = gfx_v8_0_suspend, 6895 .resume = gfx_v8_0_resume, 6896 .is_idle = gfx_v8_0_is_idle, 6897 .wait_for_idle = gfx_v8_0_wait_for_idle, 6898 .check_soft_reset = gfx_v8_0_check_soft_reset, 6899 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6900 .soft_reset = gfx_v8_0_soft_reset, 6901 .post_soft_reset = gfx_v8_0_post_soft_reset, 6902 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6903 .set_powergating_state = gfx_v8_0_set_powergating_state, 6904 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6905 }; 6906 6907 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6908 .type = AMDGPU_RING_TYPE_GFX, 6909 .align_mask = 0xff, 6910 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6911 .support_64bit_ptrs = false, 6912 .get_rptr = gfx_v8_0_ring_get_rptr, 6913 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6914 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6915 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6916 5 + /* COND_EXEC */ 6917 7 + /* PIPELINE_SYNC */ 6918 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 6919 8 + /* FENCE for VM_FLUSH */ 6920 20 + /* GDS switch */ 6921 4 + /* double SWITCH_BUFFER, 6922 the first COND_EXEC jump to the place just 6923 prior to this double SWITCH_BUFFER */ 6924 5 + /* COND_EXEC */ 6925 7 + /* HDP_flush */ 6926 4 + /* VGT_flush */ 6927 14 + /* CE_META */ 6928 31 + /* DE_META */ 6929 3 + /* CNTX_CTRL */ 6930 5 + /* HDP_INVL */ 6931 8 + 8 + /* FENCE x2 */ 6932 2, /* SWITCH_BUFFER */ 6933 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6934 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6935 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6936 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6937 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6938 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6939 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6940 .test_ring = gfx_v8_0_ring_test_ring, 6941 .test_ib = gfx_v8_0_ring_test_ib, 6942 .insert_nop = amdgpu_ring_insert_nop, 6943 .pad_ib = amdgpu_ring_generic_pad_ib, 6944 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6945 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6946 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6947 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6948 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6949 }; 6950 6951 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6952 .type = AMDGPU_RING_TYPE_COMPUTE, 6953 .align_mask = 0xff, 6954 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6955 .support_64bit_ptrs = false, 6956 .get_rptr = gfx_v8_0_ring_get_rptr, 6957 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6958 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6959 .emit_frame_size = 6960 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6961 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6962 5 + /* hdp_invalidate */ 6963 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6964 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6965 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6966 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6967 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6968 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6969 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6970 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6971 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6972 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6973 .test_ring = gfx_v8_0_ring_test_ring, 6974 .test_ib = gfx_v8_0_ring_test_ib, 6975 .insert_nop = amdgpu_ring_insert_nop, 6976 .pad_ib = amdgpu_ring_generic_pad_ib, 6977 .set_priority = gfx_v8_0_ring_set_priority_compute, 6978 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6979 }; 6980 6981 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6982 .type = AMDGPU_RING_TYPE_KIQ, 6983 .align_mask = 0xff, 6984 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6985 .support_64bit_ptrs = false, 6986 .get_rptr = gfx_v8_0_ring_get_rptr, 6987 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6988 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6989 .emit_frame_size = 6990 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6991 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6992 5 + /* hdp_invalidate */ 6993 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6994 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6995 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6996 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6997 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6998 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6999 .test_ring = gfx_v8_0_ring_test_ring, 7000 .test_ib = gfx_v8_0_ring_test_ib, 7001 .insert_nop = amdgpu_ring_insert_nop, 7002 .pad_ib = amdgpu_ring_generic_pad_ib, 7003 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7004 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7005 }; 7006 7007 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7008 { 7009 int i; 7010 7011 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7012 7013 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7014 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7015 7016 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7017 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7018 } 7019 7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7021 .set = gfx_v8_0_set_eop_interrupt_state, 7022 .process = gfx_v8_0_eop_irq, 7023 }; 7024 7025 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7026 .set = gfx_v8_0_set_priv_reg_fault_state, 7027 .process = gfx_v8_0_priv_reg_irq, 7028 }; 7029 7030 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7031 .set = gfx_v8_0_set_priv_inst_fault_state, 7032 .process = gfx_v8_0_priv_inst_irq, 7033 }; 7034 7035 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7036 .set = gfx_v8_0_kiq_set_interrupt_state, 7037 .process = gfx_v8_0_kiq_irq, 7038 }; 7039 7040 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7041 { 7042 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7043 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7044 7045 adev->gfx.priv_reg_irq.num_types = 1; 7046 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7047 7048 adev->gfx.priv_inst_irq.num_types = 1; 7049 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7050 7051 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7052 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7053 } 7054 7055 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7056 { 7057 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7058 } 7059 7060 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7061 { 7062 /* init asci gds info */ 7063 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7064 adev->gds.gws.total_size = 64; 7065 adev->gds.oa.total_size = 16; 7066 7067 if (adev->gds.mem.total_size == 64 * 1024) { 7068 adev->gds.mem.gfx_partition_size = 4096; 7069 adev->gds.mem.cs_partition_size = 4096; 7070 7071 adev->gds.gws.gfx_partition_size = 4; 7072 adev->gds.gws.cs_partition_size = 4; 7073 7074 adev->gds.oa.gfx_partition_size = 4; 7075 adev->gds.oa.cs_partition_size = 1; 7076 } else { 7077 adev->gds.mem.gfx_partition_size = 1024; 7078 adev->gds.mem.cs_partition_size = 1024; 7079 7080 adev->gds.gws.gfx_partition_size = 16; 7081 adev->gds.gws.cs_partition_size = 16; 7082 7083 adev->gds.oa.gfx_partition_size = 4; 7084 adev->gds.oa.cs_partition_size = 4; 7085 } 7086 } 7087 7088 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7089 u32 bitmap) 7090 { 7091 u32 data; 7092 7093 if (!bitmap) 7094 return; 7095 7096 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7097 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7098 7099 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7100 } 7101 7102 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7103 { 7104 u32 data, mask; 7105 7106 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7107 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7108 7109 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7110 7111 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7112 } 7113 7114 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7115 { 7116 int i, j, k, counter, active_cu_number = 0; 7117 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7118 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7119 unsigned disable_masks[4 * 2]; 7120 u32 ao_cu_num; 7121 7122 memset(cu_info, 0, sizeof(*cu_info)); 7123 7124 if (adev->flags & AMD_IS_APU) 7125 ao_cu_num = 2; 7126 else 7127 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7128 7129 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7130 7131 mutex_lock(&adev->grbm_idx_mutex); 7132 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7133 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7134 mask = 1; 7135 ao_bitmap = 0; 7136 counter = 0; 7137 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7138 if (i < 4 && j < 2) 7139 gfx_v8_0_set_user_cu_inactive_bitmap( 7140 adev, disable_masks[i * 2 + j]); 7141 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7142 cu_info->bitmap[i][j] = bitmap; 7143 7144 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7145 if (bitmap & mask) { 7146 if (counter < ao_cu_num) 7147 ao_bitmap |= mask; 7148 counter ++; 7149 } 7150 mask <<= 1; 7151 } 7152 active_cu_number += counter; 7153 if (i < 2 && j < 2) 7154 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7155 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7156 } 7157 } 7158 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7159 mutex_unlock(&adev->grbm_idx_mutex); 7160 7161 cu_info->number = active_cu_number; 7162 cu_info->ao_cu_mask = ao_cu_mask; 7163 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7164 cu_info->max_waves_per_simd = 10; 7165 cu_info->max_scratch_slots_per_cu = 32; 7166 cu_info->wave_front_size = 64; 7167 cu_info->lds_size = 64; 7168 } 7169 7170 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7171 { 7172 .type = AMD_IP_BLOCK_TYPE_GFX, 7173 .major = 8, 7174 .minor = 0, 7175 .rev = 0, 7176 .funcs = &gfx_v8_0_ip_funcs, 7177 }; 7178 7179 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7180 { 7181 .type = AMD_IP_BLOCK_TYPE_GFX, 7182 .major = 8, 7183 .minor = 1, 7184 .rev = 0, 7185 .funcs = &gfx_v8_0_ip_funcs, 7186 }; 7187 7188 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7189 { 7190 uint64_t ce_payload_addr; 7191 int cnt_ce; 7192 union { 7193 struct vi_ce_ib_state regular; 7194 struct vi_ce_ib_state_chained_ib chained; 7195 } ce_payload = {}; 7196 7197 if (ring->adev->virt.chained_ib_support) { 7198 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7199 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7200 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7201 } else { 7202 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7203 offsetof(struct vi_gfx_meta_data, ce_payload); 7204 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7205 } 7206 7207 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7208 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7209 WRITE_DATA_DST_SEL(8) | 7210 WR_CONFIRM) | 7211 WRITE_DATA_CACHE_POLICY(0)); 7212 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7213 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7214 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7215 } 7216 7217 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7218 { 7219 uint64_t de_payload_addr, gds_addr, csa_addr; 7220 int cnt_de; 7221 union { 7222 struct vi_de_ib_state regular; 7223 struct vi_de_ib_state_chained_ib chained; 7224 } de_payload = {}; 7225 7226 csa_addr = amdgpu_csa_vaddr(ring->adev); 7227 gds_addr = csa_addr + 4096; 7228 if (ring->adev->virt.chained_ib_support) { 7229 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7230 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7231 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7232 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7233 } else { 7234 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7235 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7236 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7237 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7238 } 7239 7240 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7241 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7242 WRITE_DATA_DST_SEL(8) | 7243 WR_CONFIRM) | 7244 WRITE_DATA_CACHE_POLICY(0)); 7245 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7246 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7247 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7248 } 7249