1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_MEC_HPD_SIZE 2048 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 139 140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 151 152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 163 164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 166 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 170 171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 172 { 173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 189 }; 190 191 static const u32 golden_settings_tonga_a11[] = 192 { 193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 195 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 196 mmGB_GPU_ID, 0x0000000f, 0x00000000, 197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 203 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 209 }; 210 211 static const u32 tonga_golden_common_all[] = 212 { 213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 221 }; 222 223 static const u32 tonga_mgcg_cgcg_init[] = 224 { 225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 300 }; 301 302 static const u32 golden_settings_vegam_a11[] = 303 { 304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 307 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 314 mmSQ_CONFIG, 0x07f80000, 0x01180000, 315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 316 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 321 }; 322 323 static const u32 vegam_golden_common_all[] = 324 { 325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 331 }; 332 333 static const u32 golden_settings_polaris11_a11[] = 334 { 335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 338 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 345 mmSQ_CONFIG, 0x07f80000, 0x01180000, 346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 347 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 352 }; 353 354 static const u32 polaris11_golden_common_all[] = 355 { 356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 362 }; 363 364 static const u32 golden_settings_polaris10_a11[] = 365 { 366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 370 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 377 mmSQ_CONFIG, 0x07f80000, 0x07180000, 378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 379 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 383 }; 384 385 static const u32 polaris10_golden_common_all[] = 386 { 387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 395 }; 396 397 static const u32 fiji_golden_common_all[] = 398 { 399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 409 }; 410 411 static const u32 golden_settings_fiji_a10[] = 412 { 413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 414 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 420 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 424 }; 425 426 static const u32 fiji_mgcg_cgcg_init[] = 427 { 428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 463 }; 464 465 static const u32 golden_settings_iceland_a11[] = 466 { 467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 468 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 469 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 470 mmGB_GPU_ID, 0x0000000f, 0x00000000, 471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 478 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 483 }; 484 485 static const u32 iceland_golden_common_all[] = 486 { 487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 495 }; 496 497 static const u32 iceland_mgcg_cgcg_init[] = 498 { 499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 563 }; 564 565 static const u32 cz_golden_settings_a11[] = 566 { 567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 568 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 569 mmGB_GPU_ID, 0x0000000f, 0x00000000, 570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 575 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 579 }; 580 581 static const u32 cz_golden_common_all[] = 582 { 583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 591 }; 592 593 static const u32 cz_mgcg_cgcg_init[] = 594 { 595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 670 }; 671 672 static const u32 stoney_golden_settings_a11[] = 673 { 674 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 675 mmGB_GPU_ID, 0x0000000f, 0x00000000, 676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 680 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 684 }; 685 686 static const u32 stoney_golden_common_all[] = 687 { 688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 696 }; 697 698 static const u32 stoney_mgcg_cgcg_init[] = 699 { 700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 705 }; 706 707 708 static const char * const sq_edc_source_names[] = { 709 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 710 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 711 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 712 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 713 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 714 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 715 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 716 }; 717 718 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 719 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 720 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 722 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 723 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 724 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 725 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 726 727 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 728 { 729 switch (adev->asic_type) { 730 case CHIP_TOPAZ: 731 amdgpu_device_program_register_sequence(adev, 732 iceland_mgcg_cgcg_init, 733 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 734 amdgpu_device_program_register_sequence(adev, 735 golden_settings_iceland_a11, 736 ARRAY_SIZE(golden_settings_iceland_a11)); 737 amdgpu_device_program_register_sequence(adev, 738 iceland_golden_common_all, 739 ARRAY_SIZE(iceland_golden_common_all)); 740 break; 741 case CHIP_FIJI: 742 amdgpu_device_program_register_sequence(adev, 743 fiji_mgcg_cgcg_init, 744 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 745 amdgpu_device_program_register_sequence(adev, 746 golden_settings_fiji_a10, 747 ARRAY_SIZE(golden_settings_fiji_a10)); 748 amdgpu_device_program_register_sequence(adev, 749 fiji_golden_common_all, 750 ARRAY_SIZE(fiji_golden_common_all)); 751 break; 752 753 case CHIP_TONGA: 754 amdgpu_device_program_register_sequence(adev, 755 tonga_mgcg_cgcg_init, 756 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 757 amdgpu_device_program_register_sequence(adev, 758 golden_settings_tonga_a11, 759 ARRAY_SIZE(golden_settings_tonga_a11)); 760 amdgpu_device_program_register_sequence(adev, 761 tonga_golden_common_all, 762 ARRAY_SIZE(tonga_golden_common_all)); 763 break; 764 case CHIP_VEGAM: 765 amdgpu_device_program_register_sequence(adev, 766 golden_settings_vegam_a11, 767 ARRAY_SIZE(golden_settings_vegam_a11)); 768 amdgpu_device_program_register_sequence(adev, 769 vegam_golden_common_all, 770 ARRAY_SIZE(vegam_golden_common_all)); 771 break; 772 case CHIP_POLARIS11: 773 case CHIP_POLARIS12: 774 amdgpu_device_program_register_sequence(adev, 775 golden_settings_polaris11_a11, 776 ARRAY_SIZE(golden_settings_polaris11_a11)); 777 amdgpu_device_program_register_sequence(adev, 778 polaris11_golden_common_all, 779 ARRAY_SIZE(polaris11_golden_common_all)); 780 break; 781 case CHIP_POLARIS10: 782 amdgpu_device_program_register_sequence(adev, 783 golden_settings_polaris10_a11, 784 ARRAY_SIZE(golden_settings_polaris10_a11)); 785 amdgpu_device_program_register_sequence(adev, 786 polaris10_golden_common_all, 787 ARRAY_SIZE(polaris10_golden_common_all)); 788 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 789 if (adev->pdev->revision == 0xc7 && 790 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 791 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 792 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 793 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 794 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 795 } 796 break; 797 case CHIP_CARRIZO: 798 amdgpu_device_program_register_sequence(adev, 799 cz_mgcg_cgcg_init, 800 ARRAY_SIZE(cz_mgcg_cgcg_init)); 801 amdgpu_device_program_register_sequence(adev, 802 cz_golden_settings_a11, 803 ARRAY_SIZE(cz_golden_settings_a11)); 804 amdgpu_device_program_register_sequence(adev, 805 cz_golden_common_all, 806 ARRAY_SIZE(cz_golden_common_all)); 807 break; 808 case CHIP_STONEY: 809 amdgpu_device_program_register_sequence(adev, 810 stoney_mgcg_cgcg_init, 811 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 812 amdgpu_device_program_register_sequence(adev, 813 stoney_golden_settings_a11, 814 ARRAY_SIZE(stoney_golden_settings_a11)); 815 amdgpu_device_program_register_sequence(adev, 816 stoney_golden_common_all, 817 ARRAY_SIZE(stoney_golden_common_all)); 818 break; 819 default: 820 break; 821 } 822 } 823 824 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 825 { 826 adev->gfx.scratch.num_reg = 8; 827 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 828 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 829 } 830 831 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 832 { 833 struct amdgpu_device *adev = ring->adev; 834 uint32_t scratch; 835 uint32_t tmp = 0; 836 unsigned i; 837 int r; 838 839 r = amdgpu_gfx_scratch_get(adev, &scratch); 840 if (r) { 841 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 842 return r; 843 } 844 WREG32(scratch, 0xCAFEDEAD); 845 r = amdgpu_ring_alloc(ring, 3); 846 if (r) { 847 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 848 ring->idx, r); 849 amdgpu_gfx_scratch_free(adev, scratch); 850 return r; 851 } 852 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 853 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 854 amdgpu_ring_write(ring, 0xDEADBEEF); 855 amdgpu_ring_commit(ring); 856 857 for (i = 0; i < adev->usec_timeout; i++) { 858 tmp = RREG32(scratch); 859 if (tmp == 0xDEADBEEF) 860 break; 861 DRM_UDELAY(1); 862 } 863 if (i < adev->usec_timeout) { 864 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 865 ring->idx, i); 866 } else { 867 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 868 ring->idx, scratch, tmp); 869 r = -EINVAL; 870 } 871 amdgpu_gfx_scratch_free(adev, scratch); 872 return r; 873 } 874 875 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 876 { 877 struct amdgpu_device *adev = ring->adev; 878 struct amdgpu_ib ib; 879 struct dma_fence *f = NULL; 880 881 unsigned int index; 882 uint64_t gpu_addr; 883 uint32_t tmp; 884 long r; 885 886 r = amdgpu_device_wb_get(adev, &index); 887 if (r) { 888 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 889 return r; 890 } 891 892 gpu_addr = adev->wb.gpu_addr + (index * 4); 893 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 894 memset(&ib, 0, sizeof(ib)); 895 r = amdgpu_ib_get(adev, NULL, 16, &ib); 896 if (r) { 897 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 898 goto err1; 899 } 900 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 901 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 902 ib.ptr[2] = lower_32_bits(gpu_addr); 903 ib.ptr[3] = upper_32_bits(gpu_addr); 904 ib.ptr[4] = 0xDEADBEEF; 905 ib.length_dw = 5; 906 907 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 908 if (r) 909 goto err2; 910 911 r = dma_fence_wait_timeout(f, false, timeout); 912 if (r == 0) { 913 DRM_ERROR("amdgpu: IB test timed out.\n"); 914 r = -ETIMEDOUT; 915 goto err2; 916 } else if (r < 0) { 917 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 918 goto err2; 919 } 920 921 tmp = adev->wb.wb[index]; 922 if (tmp == 0xDEADBEEF) { 923 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 924 r = 0; 925 } else { 926 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 927 r = -EINVAL; 928 } 929 930 err2: 931 amdgpu_ib_free(adev, &ib, NULL); 932 dma_fence_put(f); 933 err1: 934 amdgpu_device_wb_free(adev, index); 935 return r; 936 } 937 938 939 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 940 { 941 release_firmware(adev->gfx.pfp_fw); 942 adev->gfx.pfp_fw = NULL; 943 release_firmware(adev->gfx.me_fw); 944 adev->gfx.me_fw = NULL; 945 release_firmware(adev->gfx.ce_fw); 946 adev->gfx.ce_fw = NULL; 947 release_firmware(adev->gfx.rlc_fw); 948 adev->gfx.rlc_fw = NULL; 949 release_firmware(adev->gfx.mec_fw); 950 adev->gfx.mec_fw = NULL; 951 if ((adev->asic_type != CHIP_STONEY) && 952 (adev->asic_type != CHIP_TOPAZ)) 953 release_firmware(adev->gfx.mec2_fw); 954 adev->gfx.mec2_fw = NULL; 955 956 kfree(adev->gfx.rlc.register_list_format); 957 } 958 959 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 960 { 961 const char *chip_name; 962 char fw_name[30]; 963 int err; 964 struct amdgpu_firmware_info *info = NULL; 965 const struct common_firmware_header *header = NULL; 966 const struct gfx_firmware_header_v1_0 *cp_hdr; 967 const struct rlc_firmware_header_v2_0 *rlc_hdr; 968 unsigned int *tmp = NULL, i; 969 970 DRM_DEBUG("\n"); 971 972 switch (adev->asic_type) { 973 case CHIP_TOPAZ: 974 chip_name = "topaz"; 975 break; 976 case CHIP_TONGA: 977 chip_name = "tonga"; 978 break; 979 case CHIP_CARRIZO: 980 chip_name = "carrizo"; 981 break; 982 case CHIP_FIJI: 983 chip_name = "fiji"; 984 break; 985 case CHIP_STONEY: 986 chip_name = "stoney"; 987 break; 988 case CHIP_POLARIS10: 989 chip_name = "polaris10"; 990 break; 991 case CHIP_POLARIS11: 992 chip_name = "polaris11"; 993 break; 994 case CHIP_POLARIS12: 995 chip_name = "polaris12"; 996 break; 997 case CHIP_VEGAM: 998 chip_name = "vegam"; 999 break; 1000 default: 1001 BUG(); 1002 } 1003 1004 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1005 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 1006 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1007 if (err == -ENOENT) { 1008 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1009 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1010 } 1011 } else { 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1013 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1014 } 1015 if (err) 1016 goto out; 1017 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1018 if (err) 1019 goto out; 1020 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1021 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1022 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1023 1024 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1026 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1027 if (err == -ENOENT) { 1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1029 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1030 } 1031 } else { 1032 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1033 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1034 } 1035 if (err) 1036 goto out; 1037 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1038 if (err) 1039 goto out; 1040 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1041 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1042 1043 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1044 1045 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1046 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1047 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1048 if (err == -ENOENT) { 1049 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1050 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1051 } 1052 } else { 1053 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1054 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1055 } 1056 if (err) 1057 goto out; 1058 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1059 if (err) 1060 goto out; 1061 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1062 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1063 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1064 1065 /* 1066 * Support for MCBP/Virtualization in combination with chained IBs is 1067 * formal released on feature version #46 1068 */ 1069 if (adev->gfx.ce_feature_version >= 46 && 1070 adev->gfx.pfp_feature_version >= 46) { 1071 adev->virt.chained_ib_support = true; 1072 DRM_INFO("Chained IB support enabled!\n"); 1073 } else 1074 adev->virt.chained_ib_support = false; 1075 1076 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1077 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1078 if (err) 1079 goto out; 1080 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1081 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1082 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1083 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1084 1085 adev->gfx.rlc.save_and_restore_offset = 1086 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1087 adev->gfx.rlc.clear_state_descriptor_offset = 1088 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1089 adev->gfx.rlc.avail_scratch_ram_locations = 1090 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1091 adev->gfx.rlc.reg_restore_list_size = 1092 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1093 adev->gfx.rlc.reg_list_format_start = 1094 le32_to_cpu(rlc_hdr->reg_list_format_start); 1095 adev->gfx.rlc.reg_list_format_separate_start = 1096 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1097 adev->gfx.rlc.starting_offsets_start = 1098 le32_to_cpu(rlc_hdr->starting_offsets_start); 1099 adev->gfx.rlc.reg_list_format_size_bytes = 1100 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1101 adev->gfx.rlc.reg_list_size_bytes = 1102 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1103 1104 adev->gfx.rlc.register_list_format = 1105 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1106 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1107 1108 if (!adev->gfx.rlc.register_list_format) { 1109 err = -ENOMEM; 1110 goto out; 1111 } 1112 1113 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1114 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1115 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1116 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1117 1118 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1119 1120 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1121 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1122 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1123 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1124 1125 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1126 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1127 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1128 if (err == -ENOENT) { 1129 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1130 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1131 } 1132 } else { 1133 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1134 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1135 } 1136 if (err) 1137 goto out; 1138 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1139 if (err) 1140 goto out; 1141 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1142 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1143 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1144 1145 if ((adev->asic_type != CHIP_STONEY) && 1146 (adev->asic_type != CHIP_TOPAZ)) { 1147 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1148 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1149 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1150 if (err == -ENOENT) { 1151 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1152 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1153 } 1154 } else { 1155 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1156 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1157 } 1158 if (!err) { 1159 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1160 if (err) 1161 goto out; 1162 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1163 adev->gfx.mec2_fw->data; 1164 adev->gfx.mec2_fw_version = 1165 le32_to_cpu(cp_hdr->header.ucode_version); 1166 adev->gfx.mec2_feature_version = 1167 le32_to_cpu(cp_hdr->ucode_feature_version); 1168 } else { 1169 err = 0; 1170 adev->gfx.mec2_fw = NULL; 1171 } 1172 } 1173 1174 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1175 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1176 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1177 info->fw = adev->gfx.pfp_fw; 1178 header = (const struct common_firmware_header *)info->fw->data; 1179 adev->firmware.fw_size += 1180 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1181 1182 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1183 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1184 info->fw = adev->gfx.me_fw; 1185 header = (const struct common_firmware_header *)info->fw->data; 1186 adev->firmware.fw_size += 1187 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1188 1189 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1190 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1191 info->fw = adev->gfx.ce_fw; 1192 header = (const struct common_firmware_header *)info->fw->data; 1193 adev->firmware.fw_size += 1194 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1195 1196 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1197 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1198 info->fw = adev->gfx.rlc_fw; 1199 header = (const struct common_firmware_header *)info->fw->data; 1200 adev->firmware.fw_size += 1201 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1202 1203 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1204 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1205 info->fw = adev->gfx.mec_fw; 1206 header = (const struct common_firmware_header *)info->fw->data; 1207 adev->firmware.fw_size += 1208 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1209 1210 /* we need account JT in */ 1211 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1212 adev->firmware.fw_size += 1213 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1214 1215 if (amdgpu_sriov_vf(adev)) { 1216 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1217 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1218 info->fw = adev->gfx.mec_fw; 1219 adev->firmware.fw_size += 1220 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1221 } 1222 1223 if (adev->gfx.mec2_fw) { 1224 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1225 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1226 info->fw = adev->gfx.mec2_fw; 1227 header = (const struct common_firmware_header *)info->fw->data; 1228 adev->firmware.fw_size += 1229 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1230 } 1231 1232 } 1233 1234 out: 1235 if (err) { 1236 dev_err(adev->dev, 1237 "gfx8: Failed to load firmware \"%s\"\n", 1238 fw_name); 1239 release_firmware(adev->gfx.pfp_fw); 1240 adev->gfx.pfp_fw = NULL; 1241 release_firmware(adev->gfx.me_fw); 1242 adev->gfx.me_fw = NULL; 1243 release_firmware(adev->gfx.ce_fw); 1244 adev->gfx.ce_fw = NULL; 1245 release_firmware(adev->gfx.rlc_fw); 1246 adev->gfx.rlc_fw = NULL; 1247 release_firmware(adev->gfx.mec_fw); 1248 adev->gfx.mec_fw = NULL; 1249 release_firmware(adev->gfx.mec2_fw); 1250 adev->gfx.mec2_fw = NULL; 1251 } 1252 return err; 1253 } 1254 1255 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1256 volatile u32 *buffer) 1257 { 1258 u32 count = 0, i; 1259 const struct cs_section_def *sect = NULL; 1260 const struct cs_extent_def *ext = NULL; 1261 1262 if (adev->gfx.rlc.cs_data == NULL) 1263 return; 1264 if (buffer == NULL) 1265 return; 1266 1267 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1268 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1269 1270 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1271 buffer[count++] = cpu_to_le32(0x80000000); 1272 buffer[count++] = cpu_to_le32(0x80000000); 1273 1274 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1275 for (ext = sect->section; ext->extent != NULL; ++ext) { 1276 if (sect->id == SECT_CONTEXT) { 1277 buffer[count++] = 1278 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1279 buffer[count++] = cpu_to_le32(ext->reg_index - 1280 PACKET3_SET_CONTEXT_REG_START); 1281 for (i = 0; i < ext->reg_count; i++) 1282 buffer[count++] = cpu_to_le32(ext->extent[i]); 1283 } else { 1284 return; 1285 } 1286 } 1287 } 1288 1289 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1290 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1291 PACKET3_SET_CONTEXT_REG_START); 1292 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1293 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1294 1295 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1296 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1297 1298 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1299 buffer[count++] = cpu_to_le32(0); 1300 } 1301 1302 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1303 { 1304 const __le32 *fw_data; 1305 volatile u32 *dst_ptr; 1306 int me, i, max_me = 4; 1307 u32 bo_offset = 0; 1308 u32 table_offset, table_size; 1309 1310 if (adev->asic_type == CHIP_CARRIZO) 1311 max_me = 5; 1312 1313 /* write the cp table buffer */ 1314 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1315 for (me = 0; me < max_me; me++) { 1316 if (me == 0) { 1317 const struct gfx_firmware_header_v1_0 *hdr = 1318 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1319 fw_data = (const __le32 *) 1320 (adev->gfx.ce_fw->data + 1321 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1322 table_offset = le32_to_cpu(hdr->jt_offset); 1323 table_size = le32_to_cpu(hdr->jt_size); 1324 } else if (me == 1) { 1325 const struct gfx_firmware_header_v1_0 *hdr = 1326 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1327 fw_data = (const __le32 *) 1328 (adev->gfx.pfp_fw->data + 1329 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1330 table_offset = le32_to_cpu(hdr->jt_offset); 1331 table_size = le32_to_cpu(hdr->jt_size); 1332 } else if (me == 2) { 1333 const struct gfx_firmware_header_v1_0 *hdr = 1334 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1335 fw_data = (const __le32 *) 1336 (adev->gfx.me_fw->data + 1337 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1338 table_offset = le32_to_cpu(hdr->jt_offset); 1339 table_size = le32_to_cpu(hdr->jt_size); 1340 } else if (me == 3) { 1341 const struct gfx_firmware_header_v1_0 *hdr = 1342 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1343 fw_data = (const __le32 *) 1344 (adev->gfx.mec_fw->data + 1345 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1346 table_offset = le32_to_cpu(hdr->jt_offset); 1347 table_size = le32_to_cpu(hdr->jt_size); 1348 } else if (me == 4) { 1349 const struct gfx_firmware_header_v1_0 *hdr = 1350 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1351 fw_data = (const __le32 *) 1352 (adev->gfx.mec2_fw->data + 1353 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1354 table_offset = le32_to_cpu(hdr->jt_offset); 1355 table_size = le32_to_cpu(hdr->jt_size); 1356 } 1357 1358 for (i = 0; i < table_size; i ++) { 1359 dst_ptr[bo_offset + i] = 1360 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1361 } 1362 1363 bo_offset += table_size; 1364 } 1365 } 1366 1367 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1368 { 1369 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1370 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1371 } 1372 1373 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1374 { 1375 volatile u32 *dst_ptr; 1376 u32 dws; 1377 const struct cs_section_def *cs_data; 1378 int r; 1379 1380 adev->gfx.rlc.cs_data = vi_cs_data; 1381 1382 cs_data = adev->gfx.rlc.cs_data; 1383 1384 if (cs_data) { 1385 /* clear state block */ 1386 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1387 1388 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1389 AMDGPU_GEM_DOMAIN_VRAM, 1390 &adev->gfx.rlc.clear_state_obj, 1391 &adev->gfx.rlc.clear_state_gpu_addr, 1392 (void **)&adev->gfx.rlc.cs_ptr); 1393 if (r) { 1394 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1395 gfx_v8_0_rlc_fini(adev); 1396 return r; 1397 } 1398 1399 /* set up the cs buffer */ 1400 dst_ptr = adev->gfx.rlc.cs_ptr; 1401 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1402 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1403 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1404 } 1405 1406 if ((adev->asic_type == CHIP_CARRIZO) || 1407 (adev->asic_type == CHIP_STONEY)) { 1408 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1409 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1410 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1411 &adev->gfx.rlc.cp_table_obj, 1412 &adev->gfx.rlc.cp_table_gpu_addr, 1413 (void **)&adev->gfx.rlc.cp_table_ptr); 1414 if (r) { 1415 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1416 return r; 1417 } 1418 1419 cz_init_cp_jump_table(adev); 1420 1421 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1422 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1423 } 1424 1425 return 0; 1426 } 1427 1428 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1429 { 1430 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1431 } 1432 1433 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1434 { 1435 int r; 1436 u32 *hpd; 1437 size_t mec_hpd_size; 1438 1439 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1440 1441 /* take ownership of the relevant compute queues */ 1442 amdgpu_gfx_compute_queue_acquire(adev); 1443 1444 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1445 1446 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1447 AMDGPU_GEM_DOMAIN_GTT, 1448 &adev->gfx.mec.hpd_eop_obj, 1449 &adev->gfx.mec.hpd_eop_gpu_addr, 1450 (void **)&hpd); 1451 if (r) { 1452 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1453 return r; 1454 } 1455 1456 memset(hpd, 0, mec_hpd_size); 1457 1458 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1459 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1460 1461 return 0; 1462 } 1463 1464 static const u32 vgpr_init_compute_shader[] = 1465 { 1466 0x7e000209, 0x7e020208, 1467 0x7e040207, 0x7e060206, 1468 0x7e080205, 0x7e0a0204, 1469 0x7e0c0203, 0x7e0e0202, 1470 0x7e100201, 0x7e120200, 1471 0x7e140209, 0x7e160208, 1472 0x7e180207, 0x7e1a0206, 1473 0x7e1c0205, 0x7e1e0204, 1474 0x7e200203, 0x7e220202, 1475 0x7e240201, 0x7e260200, 1476 0x7e280209, 0x7e2a0208, 1477 0x7e2c0207, 0x7e2e0206, 1478 0x7e300205, 0x7e320204, 1479 0x7e340203, 0x7e360202, 1480 0x7e380201, 0x7e3a0200, 1481 0x7e3c0209, 0x7e3e0208, 1482 0x7e400207, 0x7e420206, 1483 0x7e440205, 0x7e460204, 1484 0x7e480203, 0x7e4a0202, 1485 0x7e4c0201, 0x7e4e0200, 1486 0x7e500209, 0x7e520208, 1487 0x7e540207, 0x7e560206, 1488 0x7e580205, 0x7e5a0204, 1489 0x7e5c0203, 0x7e5e0202, 1490 0x7e600201, 0x7e620200, 1491 0x7e640209, 0x7e660208, 1492 0x7e680207, 0x7e6a0206, 1493 0x7e6c0205, 0x7e6e0204, 1494 0x7e700203, 0x7e720202, 1495 0x7e740201, 0x7e760200, 1496 0x7e780209, 0x7e7a0208, 1497 0x7e7c0207, 0x7e7e0206, 1498 0xbf8a0000, 0xbf810000, 1499 }; 1500 1501 static const u32 sgpr_init_compute_shader[] = 1502 { 1503 0xbe8a0100, 0xbe8c0102, 1504 0xbe8e0104, 0xbe900106, 1505 0xbe920108, 0xbe940100, 1506 0xbe960102, 0xbe980104, 1507 0xbe9a0106, 0xbe9c0108, 1508 0xbe9e0100, 0xbea00102, 1509 0xbea20104, 0xbea40106, 1510 0xbea60108, 0xbea80100, 1511 0xbeaa0102, 0xbeac0104, 1512 0xbeae0106, 0xbeb00108, 1513 0xbeb20100, 0xbeb40102, 1514 0xbeb60104, 0xbeb80106, 1515 0xbeba0108, 0xbebc0100, 1516 0xbebe0102, 0xbec00104, 1517 0xbec20106, 0xbec40108, 1518 0xbec60100, 0xbec80102, 1519 0xbee60004, 0xbee70005, 1520 0xbeea0006, 0xbeeb0007, 1521 0xbee80008, 0xbee90009, 1522 0xbefc0000, 0xbf8a0000, 1523 0xbf810000, 0x00000000, 1524 }; 1525 1526 static const u32 vgpr_init_regs[] = 1527 { 1528 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1529 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1530 mmCOMPUTE_NUM_THREAD_X, 256*4, 1531 mmCOMPUTE_NUM_THREAD_Y, 1, 1532 mmCOMPUTE_NUM_THREAD_Z, 1, 1533 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1534 mmCOMPUTE_PGM_RSRC2, 20, 1535 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1536 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1537 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1538 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1539 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1540 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1541 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1542 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1543 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1544 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1545 }; 1546 1547 static const u32 sgpr1_init_regs[] = 1548 { 1549 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1550 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1551 mmCOMPUTE_NUM_THREAD_X, 256*5, 1552 mmCOMPUTE_NUM_THREAD_Y, 1, 1553 mmCOMPUTE_NUM_THREAD_Z, 1, 1554 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1555 mmCOMPUTE_PGM_RSRC2, 20, 1556 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1557 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1558 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1559 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1560 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1561 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1562 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1563 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1564 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1565 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1566 }; 1567 1568 static const u32 sgpr2_init_regs[] = 1569 { 1570 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1571 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1572 mmCOMPUTE_NUM_THREAD_X, 256*5, 1573 mmCOMPUTE_NUM_THREAD_Y, 1, 1574 mmCOMPUTE_NUM_THREAD_Z, 1, 1575 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1576 mmCOMPUTE_PGM_RSRC2, 20, 1577 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1578 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1579 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1580 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1581 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1582 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1583 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1584 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1585 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1586 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1587 }; 1588 1589 static const u32 sec_ded_counter_registers[] = 1590 { 1591 mmCPC_EDC_ATC_CNT, 1592 mmCPC_EDC_SCRATCH_CNT, 1593 mmCPC_EDC_UCODE_CNT, 1594 mmCPF_EDC_ATC_CNT, 1595 mmCPF_EDC_ROQ_CNT, 1596 mmCPF_EDC_TAG_CNT, 1597 mmCPG_EDC_ATC_CNT, 1598 mmCPG_EDC_DMA_CNT, 1599 mmCPG_EDC_TAG_CNT, 1600 mmDC_EDC_CSINVOC_CNT, 1601 mmDC_EDC_RESTORE_CNT, 1602 mmDC_EDC_STATE_CNT, 1603 mmGDS_EDC_CNT, 1604 mmGDS_EDC_GRBM_CNT, 1605 mmGDS_EDC_OA_DED, 1606 mmSPI_EDC_CNT, 1607 mmSQC_ATC_EDC_GATCL1_CNT, 1608 mmSQC_EDC_CNT, 1609 mmSQ_EDC_DED_CNT, 1610 mmSQ_EDC_INFO, 1611 mmSQ_EDC_SEC_CNT, 1612 mmTCC_EDC_CNT, 1613 mmTCP_ATC_EDC_GATCL1_CNT, 1614 mmTCP_EDC_CNT, 1615 mmTD_EDC_CNT 1616 }; 1617 1618 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1619 { 1620 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1621 struct amdgpu_ib ib; 1622 struct dma_fence *f = NULL; 1623 int r, i; 1624 u32 tmp; 1625 unsigned total_size, vgpr_offset, sgpr_offset; 1626 u64 gpu_addr; 1627 1628 /* only supported on CZ */ 1629 if (adev->asic_type != CHIP_CARRIZO) 1630 return 0; 1631 1632 /* bail if the compute ring is not ready */ 1633 if (!ring->ready) 1634 return 0; 1635 1636 tmp = RREG32(mmGB_EDC_MODE); 1637 WREG32(mmGB_EDC_MODE, 0); 1638 1639 total_size = 1640 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1641 total_size += 1642 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1643 total_size += 1644 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1645 total_size = ALIGN(total_size, 256); 1646 vgpr_offset = total_size; 1647 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1648 sgpr_offset = total_size; 1649 total_size += sizeof(sgpr_init_compute_shader); 1650 1651 /* allocate an indirect buffer to put the commands in */ 1652 memset(&ib, 0, sizeof(ib)); 1653 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1654 if (r) { 1655 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1656 return r; 1657 } 1658 1659 /* load the compute shaders */ 1660 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1661 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1662 1663 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1664 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1665 1666 /* init the ib length to 0 */ 1667 ib.length_dw = 0; 1668 1669 /* VGPR */ 1670 /* write the register state for the compute dispatch */ 1671 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1673 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1674 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1675 } 1676 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1677 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1678 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1679 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1680 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1681 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1682 1683 /* write dispatch packet */ 1684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1685 ib.ptr[ib.length_dw++] = 8; /* x */ 1686 ib.ptr[ib.length_dw++] = 1; /* y */ 1687 ib.ptr[ib.length_dw++] = 1; /* z */ 1688 ib.ptr[ib.length_dw++] = 1689 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1690 1691 /* write CS partial flush packet */ 1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1693 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1694 1695 /* SGPR1 */ 1696 /* write the register state for the compute dispatch */ 1697 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1698 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1699 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1700 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1701 } 1702 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1703 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1704 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1705 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1706 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1707 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1708 1709 /* write dispatch packet */ 1710 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1711 ib.ptr[ib.length_dw++] = 8; /* x */ 1712 ib.ptr[ib.length_dw++] = 1; /* y */ 1713 ib.ptr[ib.length_dw++] = 1; /* z */ 1714 ib.ptr[ib.length_dw++] = 1715 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1716 1717 /* write CS partial flush packet */ 1718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1719 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1720 1721 /* SGPR2 */ 1722 /* write the register state for the compute dispatch */ 1723 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1725 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1726 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1727 } 1728 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1729 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1730 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1731 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1732 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1733 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1734 1735 /* write dispatch packet */ 1736 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1737 ib.ptr[ib.length_dw++] = 8; /* x */ 1738 ib.ptr[ib.length_dw++] = 1; /* y */ 1739 ib.ptr[ib.length_dw++] = 1; /* z */ 1740 ib.ptr[ib.length_dw++] = 1741 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1742 1743 /* write CS partial flush packet */ 1744 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1745 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1746 1747 /* shedule the ib on the ring */ 1748 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1749 if (r) { 1750 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1751 goto fail; 1752 } 1753 1754 /* wait for the GPU to finish processing the IB */ 1755 r = dma_fence_wait(f, false); 1756 if (r) { 1757 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1758 goto fail; 1759 } 1760 1761 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1762 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1763 WREG32(mmGB_EDC_MODE, tmp); 1764 1765 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1766 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1767 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1768 1769 1770 /* read back registers to clear the counters */ 1771 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1772 RREG32(sec_ded_counter_registers[i]); 1773 1774 fail: 1775 amdgpu_ib_free(adev, &ib, NULL); 1776 dma_fence_put(f); 1777 1778 return r; 1779 } 1780 1781 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1782 { 1783 u32 gb_addr_config; 1784 u32 mc_shared_chmap, mc_arb_ramcfg; 1785 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1786 u32 tmp; 1787 int ret; 1788 1789 switch (adev->asic_type) { 1790 case CHIP_TOPAZ: 1791 adev->gfx.config.max_shader_engines = 1; 1792 adev->gfx.config.max_tile_pipes = 2; 1793 adev->gfx.config.max_cu_per_sh = 6; 1794 adev->gfx.config.max_sh_per_se = 1; 1795 adev->gfx.config.max_backends_per_se = 2; 1796 adev->gfx.config.max_texture_channel_caches = 2; 1797 adev->gfx.config.max_gprs = 256; 1798 adev->gfx.config.max_gs_threads = 32; 1799 adev->gfx.config.max_hw_contexts = 8; 1800 1801 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1802 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1803 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1804 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1805 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1806 break; 1807 case CHIP_FIJI: 1808 adev->gfx.config.max_shader_engines = 4; 1809 adev->gfx.config.max_tile_pipes = 16; 1810 adev->gfx.config.max_cu_per_sh = 16; 1811 adev->gfx.config.max_sh_per_se = 1; 1812 adev->gfx.config.max_backends_per_se = 4; 1813 adev->gfx.config.max_texture_channel_caches = 16; 1814 adev->gfx.config.max_gprs = 256; 1815 adev->gfx.config.max_gs_threads = 32; 1816 adev->gfx.config.max_hw_contexts = 8; 1817 1818 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1819 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1820 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1821 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1822 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1823 break; 1824 case CHIP_POLARIS11: 1825 case CHIP_POLARIS12: 1826 ret = amdgpu_atombios_get_gfx_info(adev); 1827 if (ret) 1828 return ret; 1829 adev->gfx.config.max_gprs = 256; 1830 adev->gfx.config.max_gs_threads = 32; 1831 adev->gfx.config.max_hw_contexts = 8; 1832 1833 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1834 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1835 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1836 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1837 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1838 break; 1839 case CHIP_POLARIS10: 1840 case CHIP_VEGAM: 1841 ret = amdgpu_atombios_get_gfx_info(adev); 1842 if (ret) 1843 return ret; 1844 adev->gfx.config.max_gprs = 256; 1845 adev->gfx.config.max_gs_threads = 32; 1846 adev->gfx.config.max_hw_contexts = 8; 1847 1848 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1849 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1850 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1851 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1852 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1853 break; 1854 case CHIP_TONGA: 1855 adev->gfx.config.max_shader_engines = 4; 1856 adev->gfx.config.max_tile_pipes = 8; 1857 adev->gfx.config.max_cu_per_sh = 8; 1858 adev->gfx.config.max_sh_per_se = 1; 1859 adev->gfx.config.max_backends_per_se = 2; 1860 adev->gfx.config.max_texture_channel_caches = 8; 1861 adev->gfx.config.max_gprs = 256; 1862 adev->gfx.config.max_gs_threads = 32; 1863 adev->gfx.config.max_hw_contexts = 8; 1864 1865 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1866 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1867 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1868 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1869 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1870 break; 1871 case CHIP_CARRIZO: 1872 adev->gfx.config.max_shader_engines = 1; 1873 adev->gfx.config.max_tile_pipes = 2; 1874 adev->gfx.config.max_sh_per_se = 1; 1875 adev->gfx.config.max_backends_per_se = 2; 1876 adev->gfx.config.max_cu_per_sh = 8; 1877 adev->gfx.config.max_texture_channel_caches = 2; 1878 adev->gfx.config.max_gprs = 256; 1879 adev->gfx.config.max_gs_threads = 32; 1880 adev->gfx.config.max_hw_contexts = 8; 1881 1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1886 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1887 break; 1888 case CHIP_STONEY: 1889 adev->gfx.config.max_shader_engines = 1; 1890 adev->gfx.config.max_tile_pipes = 2; 1891 adev->gfx.config.max_sh_per_se = 1; 1892 adev->gfx.config.max_backends_per_se = 1; 1893 adev->gfx.config.max_cu_per_sh = 3; 1894 adev->gfx.config.max_texture_channel_caches = 2; 1895 adev->gfx.config.max_gprs = 256; 1896 adev->gfx.config.max_gs_threads = 16; 1897 adev->gfx.config.max_hw_contexts = 8; 1898 1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1903 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1904 break; 1905 default: 1906 adev->gfx.config.max_shader_engines = 2; 1907 adev->gfx.config.max_tile_pipes = 4; 1908 adev->gfx.config.max_cu_per_sh = 2; 1909 adev->gfx.config.max_sh_per_se = 1; 1910 adev->gfx.config.max_backends_per_se = 2; 1911 adev->gfx.config.max_texture_channel_caches = 4; 1912 adev->gfx.config.max_gprs = 256; 1913 adev->gfx.config.max_gs_threads = 32; 1914 adev->gfx.config.max_hw_contexts = 8; 1915 1916 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1917 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1918 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1919 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1920 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1921 break; 1922 } 1923 1924 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1925 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1926 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1927 1928 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1929 adev->gfx.config.mem_max_burst_length_bytes = 256; 1930 if (adev->flags & AMD_IS_APU) { 1931 /* Get memory bank mapping mode. */ 1932 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1933 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1934 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1935 1936 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1937 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1938 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1939 1940 /* Validate settings in case only one DIMM installed. */ 1941 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1942 dimm00_addr_map = 0; 1943 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1944 dimm01_addr_map = 0; 1945 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1946 dimm10_addr_map = 0; 1947 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1948 dimm11_addr_map = 0; 1949 1950 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1951 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1952 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1953 adev->gfx.config.mem_row_size_in_kb = 2; 1954 else 1955 adev->gfx.config.mem_row_size_in_kb = 1; 1956 } else { 1957 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1958 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1959 if (adev->gfx.config.mem_row_size_in_kb > 4) 1960 adev->gfx.config.mem_row_size_in_kb = 4; 1961 } 1962 1963 adev->gfx.config.shader_engine_tile_size = 32; 1964 adev->gfx.config.num_gpus = 1; 1965 adev->gfx.config.multi_gpu_tile_size = 64; 1966 1967 /* fix up row size */ 1968 switch (adev->gfx.config.mem_row_size_in_kb) { 1969 case 1: 1970 default: 1971 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1972 break; 1973 case 2: 1974 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1975 break; 1976 case 4: 1977 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1978 break; 1979 } 1980 adev->gfx.config.gb_addr_config = gb_addr_config; 1981 1982 return 0; 1983 } 1984 1985 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1986 int mec, int pipe, int queue) 1987 { 1988 int r; 1989 unsigned irq_type; 1990 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1991 1992 ring = &adev->gfx.compute_ring[ring_id]; 1993 1994 /* mec0 is me1 */ 1995 ring->me = mec + 1; 1996 ring->pipe = pipe; 1997 ring->queue = queue; 1998 1999 ring->ring_obj = NULL; 2000 ring->use_doorbell = true; 2001 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2002 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2003 + (ring_id * GFX8_MEC_HPD_SIZE); 2004 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2005 2006 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2007 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2008 + ring->pipe; 2009 2010 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2011 r = amdgpu_ring_init(adev, ring, 1024, 2012 &adev->gfx.eop_irq, irq_type); 2013 if (r) 2014 return r; 2015 2016 2017 return 0; 2018 } 2019 2020 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2021 2022 static int gfx_v8_0_sw_init(void *handle) 2023 { 2024 int i, j, k, r, ring_id; 2025 struct amdgpu_ring *ring; 2026 struct amdgpu_kiq *kiq; 2027 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2028 2029 switch (adev->asic_type) { 2030 case CHIP_TONGA: 2031 case CHIP_CARRIZO: 2032 case CHIP_FIJI: 2033 case CHIP_POLARIS10: 2034 case CHIP_POLARIS11: 2035 case CHIP_POLARIS12: 2036 case CHIP_VEGAM: 2037 adev->gfx.mec.num_mec = 2; 2038 break; 2039 case CHIP_TOPAZ: 2040 case CHIP_STONEY: 2041 default: 2042 adev->gfx.mec.num_mec = 1; 2043 break; 2044 } 2045 2046 adev->gfx.mec.num_pipe_per_mec = 4; 2047 adev->gfx.mec.num_queue_per_pipe = 8; 2048 2049 /* KIQ event */ 2050 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2051 if (r) 2052 return r; 2053 2054 /* EOP Event */ 2055 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 2056 if (r) 2057 return r; 2058 2059 /* Privileged reg */ 2060 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 2061 &adev->gfx.priv_reg_irq); 2062 if (r) 2063 return r; 2064 2065 /* Privileged inst */ 2066 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 2067 &adev->gfx.priv_inst_irq); 2068 if (r) 2069 return r; 2070 2071 /* Add CP EDC/ECC irq */ 2072 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197, 2073 &adev->gfx.cp_ecc_error_irq); 2074 if (r) 2075 return r; 2076 2077 /* SQ interrupts. */ 2078 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239, 2079 &adev->gfx.sq_irq); 2080 if (r) { 2081 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2082 return r; 2083 } 2084 2085 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2086 2087 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2088 2089 gfx_v8_0_scratch_init(adev); 2090 2091 r = gfx_v8_0_init_microcode(adev); 2092 if (r) { 2093 DRM_ERROR("Failed to load gfx firmware!\n"); 2094 return r; 2095 } 2096 2097 r = gfx_v8_0_rlc_init(adev); 2098 if (r) { 2099 DRM_ERROR("Failed to init rlc BOs!\n"); 2100 return r; 2101 } 2102 2103 r = gfx_v8_0_mec_init(adev); 2104 if (r) { 2105 DRM_ERROR("Failed to init MEC BOs!\n"); 2106 return r; 2107 } 2108 2109 /* set up the gfx ring */ 2110 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2111 ring = &adev->gfx.gfx_ring[i]; 2112 ring->ring_obj = NULL; 2113 sprintf(ring->name, "gfx"); 2114 /* no gfx doorbells on iceland */ 2115 if (adev->asic_type != CHIP_TOPAZ) { 2116 ring->use_doorbell = true; 2117 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2118 } 2119 2120 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2121 AMDGPU_CP_IRQ_GFX_EOP); 2122 if (r) 2123 return r; 2124 } 2125 2126 2127 /* set up the compute queues - allocate horizontally across pipes */ 2128 ring_id = 0; 2129 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2130 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2131 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2132 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2133 continue; 2134 2135 r = gfx_v8_0_compute_ring_init(adev, 2136 ring_id, 2137 i, k, j); 2138 if (r) 2139 return r; 2140 2141 ring_id++; 2142 } 2143 } 2144 } 2145 2146 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2147 if (r) { 2148 DRM_ERROR("Failed to init KIQ BOs!\n"); 2149 return r; 2150 } 2151 2152 kiq = &adev->gfx.kiq; 2153 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2154 if (r) 2155 return r; 2156 2157 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2158 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2159 if (r) 2160 return r; 2161 2162 /* reserve GDS, GWS and OA resource for gfx */ 2163 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2164 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2165 &adev->gds.gds_gfx_bo, NULL, NULL); 2166 if (r) 2167 return r; 2168 2169 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2170 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2171 &adev->gds.gws_gfx_bo, NULL, NULL); 2172 if (r) 2173 return r; 2174 2175 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2176 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2177 &adev->gds.oa_gfx_bo, NULL, NULL); 2178 if (r) 2179 return r; 2180 2181 adev->gfx.ce_ram_size = 0x8000; 2182 2183 r = gfx_v8_0_gpu_early_init(adev); 2184 if (r) 2185 return r; 2186 2187 return 0; 2188 } 2189 2190 static int gfx_v8_0_sw_fini(void *handle) 2191 { 2192 int i; 2193 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2194 2195 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2196 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2197 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2198 2199 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2200 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2201 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2202 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2203 2204 amdgpu_gfx_compute_mqd_sw_fini(adev); 2205 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2206 amdgpu_gfx_kiq_fini(adev); 2207 2208 gfx_v8_0_mec_fini(adev); 2209 gfx_v8_0_rlc_fini(adev); 2210 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2211 &adev->gfx.rlc.clear_state_gpu_addr, 2212 (void **)&adev->gfx.rlc.cs_ptr); 2213 if ((adev->asic_type == CHIP_CARRIZO) || 2214 (adev->asic_type == CHIP_STONEY)) { 2215 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2216 &adev->gfx.rlc.cp_table_gpu_addr, 2217 (void **)&adev->gfx.rlc.cp_table_ptr); 2218 } 2219 gfx_v8_0_free_microcode(adev); 2220 2221 return 0; 2222 } 2223 2224 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2225 { 2226 uint32_t *modearray, *mod2array; 2227 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2228 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2229 u32 reg_offset; 2230 2231 modearray = adev->gfx.config.tile_mode_array; 2232 mod2array = adev->gfx.config.macrotile_mode_array; 2233 2234 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2235 modearray[reg_offset] = 0; 2236 2237 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2238 mod2array[reg_offset] = 0; 2239 2240 switch (adev->asic_type) { 2241 case CHIP_TOPAZ: 2242 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2243 PIPE_CONFIG(ADDR_SURF_P2) | 2244 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2246 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2247 PIPE_CONFIG(ADDR_SURF_P2) | 2248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2250 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2251 PIPE_CONFIG(ADDR_SURF_P2) | 2252 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2254 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2255 PIPE_CONFIG(ADDR_SURF_P2) | 2256 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2258 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2259 PIPE_CONFIG(ADDR_SURF_P2) | 2260 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2262 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2263 PIPE_CONFIG(ADDR_SURF_P2) | 2264 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2266 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2267 PIPE_CONFIG(ADDR_SURF_P2) | 2268 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2270 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2271 PIPE_CONFIG(ADDR_SURF_P2)); 2272 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2273 PIPE_CONFIG(ADDR_SURF_P2) | 2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2276 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2277 PIPE_CONFIG(ADDR_SURF_P2) | 2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2280 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2281 PIPE_CONFIG(ADDR_SURF_P2) | 2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2284 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2285 PIPE_CONFIG(ADDR_SURF_P2) | 2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2288 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2289 PIPE_CONFIG(ADDR_SURF_P2) | 2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2292 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2293 PIPE_CONFIG(ADDR_SURF_P2) | 2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2296 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2297 PIPE_CONFIG(ADDR_SURF_P2) | 2298 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2300 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2301 PIPE_CONFIG(ADDR_SURF_P2) | 2302 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2304 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2305 PIPE_CONFIG(ADDR_SURF_P2) | 2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2308 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2309 PIPE_CONFIG(ADDR_SURF_P2) | 2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2312 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2313 PIPE_CONFIG(ADDR_SURF_P2) | 2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2316 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2317 PIPE_CONFIG(ADDR_SURF_P2) | 2318 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2320 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2321 PIPE_CONFIG(ADDR_SURF_P2) | 2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2324 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2325 PIPE_CONFIG(ADDR_SURF_P2) | 2326 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2328 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2329 PIPE_CONFIG(ADDR_SURF_P2) | 2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2332 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2333 PIPE_CONFIG(ADDR_SURF_P2) | 2334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2336 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2337 PIPE_CONFIG(ADDR_SURF_P2) | 2338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2340 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2341 PIPE_CONFIG(ADDR_SURF_P2) | 2342 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2344 2345 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2348 NUM_BANKS(ADDR_SURF_8_BANK)); 2349 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2352 NUM_BANKS(ADDR_SURF_8_BANK)); 2353 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2356 NUM_BANKS(ADDR_SURF_8_BANK)); 2357 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2360 NUM_BANKS(ADDR_SURF_8_BANK)); 2361 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2364 NUM_BANKS(ADDR_SURF_8_BANK)); 2365 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2368 NUM_BANKS(ADDR_SURF_8_BANK)); 2369 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2372 NUM_BANKS(ADDR_SURF_8_BANK)); 2373 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2376 NUM_BANKS(ADDR_SURF_16_BANK)); 2377 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2380 NUM_BANKS(ADDR_SURF_16_BANK)); 2381 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2384 NUM_BANKS(ADDR_SURF_16_BANK)); 2385 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2388 NUM_BANKS(ADDR_SURF_16_BANK)); 2389 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2392 NUM_BANKS(ADDR_SURF_16_BANK)); 2393 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2396 NUM_BANKS(ADDR_SURF_16_BANK)); 2397 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2400 NUM_BANKS(ADDR_SURF_8_BANK)); 2401 2402 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2403 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2404 reg_offset != 23) 2405 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2406 2407 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2408 if (reg_offset != 7) 2409 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2410 2411 break; 2412 case CHIP_FIJI: 2413 case CHIP_VEGAM: 2414 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2415 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2416 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2418 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2421 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2422 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2424 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2426 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2428 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2430 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2434 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2438 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2442 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2446 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2448 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2452 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2456 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2458 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2460 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2461 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2462 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2464 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2468 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2472 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2476 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2480 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2481 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2484 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2488 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2492 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2496 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2497 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2500 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2504 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2505 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2508 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2512 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2516 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2520 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2521 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2522 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2524 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2526 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2528 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2532 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2536 2537 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2540 NUM_BANKS(ADDR_SURF_8_BANK)); 2541 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2544 NUM_BANKS(ADDR_SURF_8_BANK)); 2545 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2548 NUM_BANKS(ADDR_SURF_8_BANK)); 2549 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2552 NUM_BANKS(ADDR_SURF_8_BANK)); 2553 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2556 NUM_BANKS(ADDR_SURF_8_BANK)); 2557 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2560 NUM_BANKS(ADDR_SURF_8_BANK)); 2561 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2564 NUM_BANKS(ADDR_SURF_8_BANK)); 2565 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2568 NUM_BANKS(ADDR_SURF_8_BANK)); 2569 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2572 NUM_BANKS(ADDR_SURF_8_BANK)); 2573 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2576 NUM_BANKS(ADDR_SURF_8_BANK)); 2577 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2580 NUM_BANKS(ADDR_SURF_8_BANK)); 2581 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2584 NUM_BANKS(ADDR_SURF_8_BANK)); 2585 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2588 NUM_BANKS(ADDR_SURF_8_BANK)); 2589 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2592 NUM_BANKS(ADDR_SURF_4_BANK)); 2593 2594 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2595 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2596 2597 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2598 if (reg_offset != 7) 2599 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2600 2601 break; 2602 case CHIP_TONGA: 2603 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2604 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2605 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2607 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2609 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2610 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2611 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2613 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2615 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2617 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2619 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2623 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2627 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2631 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2635 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2637 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2641 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2645 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2649 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2653 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2657 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2661 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2665 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2669 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2673 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2677 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2681 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2685 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2686 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2689 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2693 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2697 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2701 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2705 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2709 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2711 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2713 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2715 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2717 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2719 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2721 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2723 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2725 2726 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2729 NUM_BANKS(ADDR_SURF_16_BANK)); 2730 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2733 NUM_BANKS(ADDR_SURF_16_BANK)); 2734 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2737 NUM_BANKS(ADDR_SURF_16_BANK)); 2738 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2741 NUM_BANKS(ADDR_SURF_16_BANK)); 2742 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2745 NUM_BANKS(ADDR_SURF_16_BANK)); 2746 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2749 NUM_BANKS(ADDR_SURF_16_BANK)); 2750 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2753 NUM_BANKS(ADDR_SURF_16_BANK)); 2754 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2757 NUM_BANKS(ADDR_SURF_16_BANK)); 2758 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2761 NUM_BANKS(ADDR_SURF_16_BANK)); 2762 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2765 NUM_BANKS(ADDR_SURF_16_BANK)); 2766 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2769 NUM_BANKS(ADDR_SURF_16_BANK)); 2770 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2773 NUM_BANKS(ADDR_SURF_8_BANK)); 2774 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2777 NUM_BANKS(ADDR_SURF_4_BANK)); 2778 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2781 NUM_BANKS(ADDR_SURF_4_BANK)); 2782 2783 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2784 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2785 2786 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2787 if (reg_offset != 7) 2788 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2789 2790 break; 2791 case CHIP_POLARIS11: 2792 case CHIP_POLARIS12: 2793 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2797 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2801 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2805 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2809 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2813 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2817 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2821 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2825 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2826 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2827 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2831 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2835 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2839 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2843 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2847 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2851 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2855 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2859 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2863 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2867 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2871 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2875 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2879 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2883 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2887 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2891 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2895 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2899 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2901 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2903 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2905 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2907 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2909 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2911 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2913 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2915 2916 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2919 NUM_BANKS(ADDR_SURF_16_BANK)); 2920 2921 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2924 NUM_BANKS(ADDR_SURF_16_BANK)); 2925 2926 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2929 NUM_BANKS(ADDR_SURF_16_BANK)); 2930 2931 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2934 NUM_BANKS(ADDR_SURF_16_BANK)); 2935 2936 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2939 NUM_BANKS(ADDR_SURF_16_BANK)); 2940 2941 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2944 NUM_BANKS(ADDR_SURF_16_BANK)); 2945 2946 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2949 NUM_BANKS(ADDR_SURF_16_BANK)); 2950 2951 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2954 NUM_BANKS(ADDR_SURF_16_BANK)); 2955 2956 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2959 NUM_BANKS(ADDR_SURF_16_BANK)); 2960 2961 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2962 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2963 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2964 NUM_BANKS(ADDR_SURF_16_BANK)); 2965 2966 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2969 NUM_BANKS(ADDR_SURF_16_BANK)); 2970 2971 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2974 NUM_BANKS(ADDR_SURF_16_BANK)); 2975 2976 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2979 NUM_BANKS(ADDR_SURF_8_BANK)); 2980 2981 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2984 NUM_BANKS(ADDR_SURF_4_BANK)); 2985 2986 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2987 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2988 2989 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2990 if (reg_offset != 7) 2991 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2992 2993 break; 2994 case CHIP_POLARIS10: 2995 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2996 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2999 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3001 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3003 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3007 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3011 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3015 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3019 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3023 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3024 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3027 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3029 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3033 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3037 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3041 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3042 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3045 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3049 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3053 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3057 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3061 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3062 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3065 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3069 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3073 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3077 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3081 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3085 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3086 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3089 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3093 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3097 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3101 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3103 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3105 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3107 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3109 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3111 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3113 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3114 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3115 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3117 3118 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3121 NUM_BANKS(ADDR_SURF_16_BANK)); 3122 3123 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3126 NUM_BANKS(ADDR_SURF_16_BANK)); 3127 3128 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3131 NUM_BANKS(ADDR_SURF_16_BANK)); 3132 3133 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3136 NUM_BANKS(ADDR_SURF_16_BANK)); 3137 3138 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3141 NUM_BANKS(ADDR_SURF_16_BANK)); 3142 3143 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3146 NUM_BANKS(ADDR_SURF_16_BANK)); 3147 3148 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3151 NUM_BANKS(ADDR_SURF_16_BANK)); 3152 3153 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3156 NUM_BANKS(ADDR_SURF_16_BANK)); 3157 3158 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3161 NUM_BANKS(ADDR_SURF_16_BANK)); 3162 3163 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3166 NUM_BANKS(ADDR_SURF_16_BANK)); 3167 3168 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3171 NUM_BANKS(ADDR_SURF_16_BANK)); 3172 3173 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3176 NUM_BANKS(ADDR_SURF_8_BANK)); 3177 3178 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3181 NUM_BANKS(ADDR_SURF_4_BANK)); 3182 3183 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3186 NUM_BANKS(ADDR_SURF_4_BANK)); 3187 3188 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3189 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3190 3191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3192 if (reg_offset != 7) 3193 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3194 3195 break; 3196 case CHIP_STONEY: 3197 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3198 PIPE_CONFIG(ADDR_SURF_P2) | 3199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3200 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3201 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3202 PIPE_CONFIG(ADDR_SURF_P2) | 3203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3205 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3206 PIPE_CONFIG(ADDR_SURF_P2) | 3207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3209 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3210 PIPE_CONFIG(ADDR_SURF_P2) | 3211 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3213 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3214 PIPE_CONFIG(ADDR_SURF_P2) | 3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3217 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3218 PIPE_CONFIG(ADDR_SURF_P2) | 3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3221 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3222 PIPE_CONFIG(ADDR_SURF_P2) | 3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3225 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3226 PIPE_CONFIG(ADDR_SURF_P2)); 3227 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3228 PIPE_CONFIG(ADDR_SURF_P2) | 3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3231 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3232 PIPE_CONFIG(ADDR_SURF_P2) | 3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3235 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3236 PIPE_CONFIG(ADDR_SURF_P2) | 3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3239 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3240 PIPE_CONFIG(ADDR_SURF_P2) | 3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3243 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3244 PIPE_CONFIG(ADDR_SURF_P2) | 3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3247 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3248 PIPE_CONFIG(ADDR_SURF_P2) | 3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3251 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3252 PIPE_CONFIG(ADDR_SURF_P2) | 3253 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3255 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3256 PIPE_CONFIG(ADDR_SURF_P2) | 3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3259 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3260 PIPE_CONFIG(ADDR_SURF_P2) | 3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3263 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3264 PIPE_CONFIG(ADDR_SURF_P2) | 3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3267 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3268 PIPE_CONFIG(ADDR_SURF_P2) | 3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3271 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3272 PIPE_CONFIG(ADDR_SURF_P2) | 3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3275 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3276 PIPE_CONFIG(ADDR_SURF_P2) | 3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3279 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3283 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3287 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3291 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3295 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3299 3300 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3303 NUM_BANKS(ADDR_SURF_8_BANK)); 3304 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3307 NUM_BANKS(ADDR_SURF_8_BANK)); 3308 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3311 NUM_BANKS(ADDR_SURF_8_BANK)); 3312 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3315 NUM_BANKS(ADDR_SURF_8_BANK)); 3316 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3319 NUM_BANKS(ADDR_SURF_8_BANK)); 3320 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3323 NUM_BANKS(ADDR_SURF_8_BANK)); 3324 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3327 NUM_BANKS(ADDR_SURF_8_BANK)); 3328 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3331 NUM_BANKS(ADDR_SURF_16_BANK)); 3332 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3335 NUM_BANKS(ADDR_SURF_16_BANK)); 3336 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3339 NUM_BANKS(ADDR_SURF_16_BANK)); 3340 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3343 NUM_BANKS(ADDR_SURF_16_BANK)); 3344 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3347 NUM_BANKS(ADDR_SURF_16_BANK)); 3348 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3351 NUM_BANKS(ADDR_SURF_16_BANK)); 3352 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3355 NUM_BANKS(ADDR_SURF_8_BANK)); 3356 3357 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3358 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3359 reg_offset != 23) 3360 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3361 3362 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3363 if (reg_offset != 7) 3364 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3365 3366 break; 3367 default: 3368 dev_warn(adev->dev, 3369 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3370 adev->asic_type); 3371 3372 case CHIP_CARRIZO: 3373 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3374 PIPE_CONFIG(ADDR_SURF_P2) | 3375 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3376 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3377 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3378 PIPE_CONFIG(ADDR_SURF_P2) | 3379 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3380 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3381 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3382 PIPE_CONFIG(ADDR_SURF_P2) | 3383 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3384 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3385 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3386 PIPE_CONFIG(ADDR_SURF_P2) | 3387 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3389 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3390 PIPE_CONFIG(ADDR_SURF_P2) | 3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3393 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3394 PIPE_CONFIG(ADDR_SURF_P2) | 3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3397 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3398 PIPE_CONFIG(ADDR_SURF_P2) | 3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3401 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3402 PIPE_CONFIG(ADDR_SURF_P2)); 3403 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3404 PIPE_CONFIG(ADDR_SURF_P2) | 3405 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3407 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3408 PIPE_CONFIG(ADDR_SURF_P2) | 3409 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3411 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3412 PIPE_CONFIG(ADDR_SURF_P2) | 3413 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3415 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3416 PIPE_CONFIG(ADDR_SURF_P2) | 3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3419 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3420 PIPE_CONFIG(ADDR_SURF_P2) | 3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3423 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3424 PIPE_CONFIG(ADDR_SURF_P2) | 3425 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3427 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3428 PIPE_CONFIG(ADDR_SURF_P2) | 3429 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3431 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3432 PIPE_CONFIG(ADDR_SURF_P2) | 3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3435 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3436 PIPE_CONFIG(ADDR_SURF_P2) | 3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3439 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3440 PIPE_CONFIG(ADDR_SURF_P2) | 3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3443 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3444 PIPE_CONFIG(ADDR_SURF_P2) | 3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3447 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3448 PIPE_CONFIG(ADDR_SURF_P2) | 3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3451 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3452 PIPE_CONFIG(ADDR_SURF_P2) | 3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3455 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3456 PIPE_CONFIG(ADDR_SURF_P2) | 3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3459 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3460 PIPE_CONFIG(ADDR_SURF_P2) | 3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3463 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3464 PIPE_CONFIG(ADDR_SURF_P2) | 3465 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3467 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3468 PIPE_CONFIG(ADDR_SURF_P2) | 3469 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3471 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3472 PIPE_CONFIG(ADDR_SURF_P2) | 3473 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3475 3476 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3479 NUM_BANKS(ADDR_SURF_8_BANK)); 3480 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3483 NUM_BANKS(ADDR_SURF_8_BANK)); 3484 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3487 NUM_BANKS(ADDR_SURF_8_BANK)); 3488 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3491 NUM_BANKS(ADDR_SURF_8_BANK)); 3492 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3495 NUM_BANKS(ADDR_SURF_8_BANK)); 3496 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3499 NUM_BANKS(ADDR_SURF_8_BANK)); 3500 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3503 NUM_BANKS(ADDR_SURF_8_BANK)); 3504 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3507 NUM_BANKS(ADDR_SURF_16_BANK)); 3508 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3511 NUM_BANKS(ADDR_SURF_16_BANK)); 3512 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3515 NUM_BANKS(ADDR_SURF_16_BANK)); 3516 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3519 NUM_BANKS(ADDR_SURF_16_BANK)); 3520 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3523 NUM_BANKS(ADDR_SURF_16_BANK)); 3524 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3527 NUM_BANKS(ADDR_SURF_16_BANK)); 3528 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3531 NUM_BANKS(ADDR_SURF_8_BANK)); 3532 3533 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3534 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3535 reg_offset != 23) 3536 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3537 3538 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3539 if (reg_offset != 7) 3540 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3541 3542 break; 3543 } 3544 } 3545 3546 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3547 u32 se_num, u32 sh_num, u32 instance) 3548 { 3549 u32 data; 3550 3551 if (instance == 0xffffffff) 3552 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3553 else 3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3555 3556 if (se_num == 0xffffffff) 3557 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3558 else 3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3560 3561 if (sh_num == 0xffffffff) 3562 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3563 else 3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3565 3566 WREG32(mmGRBM_GFX_INDEX, data); 3567 } 3568 3569 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3570 u32 me, u32 pipe, u32 q) 3571 { 3572 vi_srbm_select(adev, me, pipe, q, 0); 3573 } 3574 3575 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3576 { 3577 u32 data, mask; 3578 3579 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3580 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3581 3582 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3583 3584 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3585 adev->gfx.config.max_sh_per_se); 3586 3587 return (~data) & mask; 3588 } 3589 3590 static void 3591 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3592 { 3593 switch (adev->asic_type) { 3594 case CHIP_FIJI: 3595 case CHIP_VEGAM: 3596 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3597 RB_XSEL2(1) | PKR_MAP(2) | 3598 PKR_XSEL(1) | PKR_YSEL(1) | 3599 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3600 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3601 SE_PAIR_YSEL(2); 3602 break; 3603 case CHIP_TONGA: 3604 case CHIP_POLARIS10: 3605 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3606 SE_XSEL(1) | SE_YSEL(1); 3607 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3608 SE_PAIR_YSEL(2); 3609 break; 3610 case CHIP_TOPAZ: 3611 case CHIP_CARRIZO: 3612 *rconf |= RB_MAP_PKR0(2); 3613 *rconf1 |= 0x0; 3614 break; 3615 case CHIP_POLARIS11: 3616 case CHIP_POLARIS12: 3617 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3618 SE_XSEL(1) | SE_YSEL(1); 3619 *rconf1 |= 0x0; 3620 break; 3621 case CHIP_STONEY: 3622 *rconf |= 0x0; 3623 *rconf1 |= 0x0; 3624 break; 3625 default: 3626 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3627 break; 3628 } 3629 } 3630 3631 static void 3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3633 u32 raster_config, u32 raster_config_1, 3634 unsigned rb_mask, unsigned num_rb) 3635 { 3636 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3637 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3638 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3639 unsigned rb_per_se = num_rb / num_se; 3640 unsigned se_mask[4]; 3641 unsigned se; 3642 3643 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3644 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3645 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3646 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3647 3648 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3649 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3650 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3651 3652 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3653 (!se_mask[2] && !se_mask[3]))) { 3654 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3655 3656 if (!se_mask[0] && !se_mask[1]) { 3657 raster_config_1 |= 3658 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3659 } else { 3660 raster_config_1 |= 3661 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3662 } 3663 } 3664 3665 for (se = 0; se < num_se; se++) { 3666 unsigned raster_config_se = raster_config; 3667 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3668 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3669 int idx = (se / 2) * 2; 3670 3671 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3672 raster_config_se &= ~SE_MAP_MASK; 3673 3674 if (!se_mask[idx]) { 3675 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3676 } else { 3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3678 } 3679 } 3680 3681 pkr0_mask &= rb_mask; 3682 pkr1_mask &= rb_mask; 3683 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3684 raster_config_se &= ~PKR_MAP_MASK; 3685 3686 if (!pkr0_mask) { 3687 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3688 } else { 3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3690 } 3691 } 3692 3693 if (rb_per_se >= 2) { 3694 unsigned rb0_mask = 1 << (se * rb_per_se); 3695 unsigned rb1_mask = rb0_mask << 1; 3696 3697 rb0_mask &= rb_mask; 3698 rb1_mask &= rb_mask; 3699 if (!rb0_mask || !rb1_mask) { 3700 raster_config_se &= ~RB_MAP_PKR0_MASK; 3701 3702 if (!rb0_mask) { 3703 raster_config_se |= 3704 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3705 } else { 3706 raster_config_se |= 3707 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3708 } 3709 } 3710 3711 if (rb_per_se > 2) { 3712 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3713 rb1_mask = rb0_mask << 1; 3714 rb0_mask &= rb_mask; 3715 rb1_mask &= rb_mask; 3716 if (!rb0_mask || !rb1_mask) { 3717 raster_config_se &= ~RB_MAP_PKR1_MASK; 3718 3719 if (!rb0_mask) { 3720 raster_config_se |= 3721 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3722 } else { 3723 raster_config_se |= 3724 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3725 } 3726 } 3727 } 3728 } 3729 3730 /* GRBM_GFX_INDEX has a different offset on VI */ 3731 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3732 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3733 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3734 } 3735 3736 /* GRBM_GFX_INDEX has a different offset on VI */ 3737 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3738 } 3739 3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3741 { 3742 int i, j; 3743 u32 data; 3744 u32 raster_config = 0, raster_config_1 = 0; 3745 u32 active_rbs = 0; 3746 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3747 adev->gfx.config.max_sh_per_se; 3748 unsigned num_rb_pipes; 3749 3750 mutex_lock(&adev->grbm_idx_mutex); 3751 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3752 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3753 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3754 data = gfx_v8_0_get_rb_active_bitmap(adev); 3755 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3756 rb_bitmap_width_per_sh); 3757 } 3758 } 3759 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3760 3761 adev->gfx.config.backend_enable_mask = active_rbs; 3762 adev->gfx.config.num_rbs = hweight32(active_rbs); 3763 3764 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3765 adev->gfx.config.max_shader_engines, 16); 3766 3767 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3768 3769 if (!adev->gfx.config.backend_enable_mask || 3770 adev->gfx.config.num_rbs >= num_rb_pipes) { 3771 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3772 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3773 } else { 3774 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3775 adev->gfx.config.backend_enable_mask, 3776 num_rb_pipes); 3777 } 3778 3779 /* cache the values for userspace */ 3780 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3781 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3782 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3783 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3784 RREG32(mmCC_RB_BACKEND_DISABLE); 3785 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3786 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3787 adev->gfx.config.rb_config[i][j].raster_config = 3788 RREG32(mmPA_SC_RASTER_CONFIG); 3789 adev->gfx.config.rb_config[i][j].raster_config_1 = 3790 RREG32(mmPA_SC_RASTER_CONFIG_1); 3791 } 3792 } 3793 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3794 mutex_unlock(&adev->grbm_idx_mutex); 3795 } 3796 3797 /** 3798 * gfx_v8_0_init_compute_vmid - gart enable 3799 * 3800 * @adev: amdgpu_device pointer 3801 * 3802 * Initialize compute vmid sh_mem registers 3803 * 3804 */ 3805 #define DEFAULT_SH_MEM_BASES (0x6000) 3806 #define FIRST_COMPUTE_VMID (8) 3807 #define LAST_COMPUTE_VMID (16) 3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3809 { 3810 int i; 3811 uint32_t sh_mem_config; 3812 uint32_t sh_mem_bases; 3813 3814 /* 3815 * Configure apertures: 3816 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3817 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3818 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3819 */ 3820 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3821 3822 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3823 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3824 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3825 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3826 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3827 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3828 3829 mutex_lock(&adev->srbm_mutex); 3830 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3831 vi_srbm_select(adev, 0, 0, 0, i); 3832 /* CP and shaders */ 3833 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3834 WREG32(mmSH_MEM_APE1_BASE, 1); 3835 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3836 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3837 } 3838 vi_srbm_select(adev, 0, 0, 0, 0); 3839 mutex_unlock(&adev->srbm_mutex); 3840 } 3841 3842 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3843 { 3844 switch (adev->asic_type) { 3845 default: 3846 adev->gfx.config.double_offchip_lds_buf = 1; 3847 break; 3848 case CHIP_CARRIZO: 3849 case CHIP_STONEY: 3850 adev->gfx.config.double_offchip_lds_buf = 0; 3851 break; 3852 } 3853 } 3854 3855 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3856 { 3857 u32 tmp, sh_static_mem_cfg; 3858 int i; 3859 3860 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3861 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3862 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3863 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3864 3865 gfx_v8_0_tiling_mode_table_init(adev); 3866 gfx_v8_0_setup_rb(adev); 3867 gfx_v8_0_get_cu_info(adev); 3868 gfx_v8_0_config_init(adev); 3869 3870 /* XXX SH_MEM regs */ 3871 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3872 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3873 SWIZZLE_ENABLE, 1); 3874 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3875 ELEMENT_SIZE, 1); 3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3877 INDEX_STRIDE, 3); 3878 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3879 3880 mutex_lock(&adev->srbm_mutex); 3881 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3882 vi_srbm_select(adev, 0, 0, 0, i); 3883 /* CP and shaders */ 3884 if (i == 0) { 3885 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3886 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3887 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3888 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3889 WREG32(mmSH_MEM_CONFIG, tmp); 3890 WREG32(mmSH_MEM_BASES, 0); 3891 } else { 3892 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3893 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3894 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3895 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3896 WREG32(mmSH_MEM_CONFIG, tmp); 3897 tmp = adev->gmc.shared_aperture_start >> 48; 3898 WREG32(mmSH_MEM_BASES, tmp); 3899 } 3900 3901 WREG32(mmSH_MEM_APE1_BASE, 1); 3902 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3903 } 3904 vi_srbm_select(adev, 0, 0, 0, 0); 3905 mutex_unlock(&adev->srbm_mutex); 3906 3907 gfx_v8_0_init_compute_vmid(adev); 3908 3909 mutex_lock(&adev->grbm_idx_mutex); 3910 /* 3911 * making sure that the following register writes will be broadcasted 3912 * to all the shaders 3913 */ 3914 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3915 3916 WREG32(mmPA_SC_FIFO_SIZE, 3917 (adev->gfx.config.sc_prim_fifo_size_frontend << 3918 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3919 (adev->gfx.config.sc_prim_fifo_size_backend << 3920 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3921 (adev->gfx.config.sc_hiz_tile_fifo_size << 3922 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3923 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3924 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3925 3926 tmp = RREG32(mmSPI_ARB_PRIORITY); 3927 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3928 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3931 WREG32(mmSPI_ARB_PRIORITY, tmp); 3932 3933 mutex_unlock(&adev->grbm_idx_mutex); 3934 3935 } 3936 3937 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3938 { 3939 u32 i, j, k; 3940 u32 mask; 3941 3942 mutex_lock(&adev->grbm_idx_mutex); 3943 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3944 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3945 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3946 for (k = 0; k < adev->usec_timeout; k++) { 3947 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3948 break; 3949 udelay(1); 3950 } 3951 if (k == adev->usec_timeout) { 3952 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3953 0xffffffff, 0xffffffff); 3954 mutex_unlock(&adev->grbm_idx_mutex); 3955 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3956 i, j); 3957 return; 3958 } 3959 } 3960 } 3961 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3962 mutex_unlock(&adev->grbm_idx_mutex); 3963 3964 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3965 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3966 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3967 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3968 for (k = 0; k < adev->usec_timeout; k++) { 3969 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3970 break; 3971 udelay(1); 3972 } 3973 } 3974 3975 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3976 bool enable) 3977 { 3978 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3979 3980 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3981 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3984 3985 WREG32(mmCP_INT_CNTL_RING0, tmp); 3986 } 3987 3988 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3989 { 3990 /* csib */ 3991 WREG32(mmRLC_CSIB_ADDR_HI, 3992 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3993 WREG32(mmRLC_CSIB_ADDR_LO, 3994 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3995 WREG32(mmRLC_CSIB_LENGTH, 3996 adev->gfx.rlc.clear_state_size); 3997 } 3998 3999 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4000 int ind_offset, 4001 int list_size, 4002 int *unique_indices, 4003 int *indices_count, 4004 int max_indices, 4005 int *ind_start_offsets, 4006 int *offset_count, 4007 int max_offset) 4008 { 4009 int indices; 4010 bool new_entry = true; 4011 4012 for (; ind_offset < list_size; ind_offset++) { 4013 4014 if (new_entry) { 4015 new_entry = false; 4016 ind_start_offsets[*offset_count] = ind_offset; 4017 *offset_count = *offset_count + 1; 4018 BUG_ON(*offset_count >= max_offset); 4019 } 4020 4021 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4022 new_entry = true; 4023 continue; 4024 } 4025 4026 ind_offset += 2; 4027 4028 /* look for the matching indice */ 4029 for (indices = 0; 4030 indices < *indices_count; 4031 indices++) { 4032 if (unique_indices[indices] == 4033 register_list_format[ind_offset]) 4034 break; 4035 } 4036 4037 if (indices >= *indices_count) { 4038 unique_indices[*indices_count] = 4039 register_list_format[ind_offset]; 4040 indices = *indices_count; 4041 *indices_count = *indices_count + 1; 4042 BUG_ON(*indices_count >= max_indices); 4043 } 4044 4045 register_list_format[ind_offset] = indices; 4046 } 4047 } 4048 4049 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4050 { 4051 int i, temp, data; 4052 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4053 int indices_count = 0; 4054 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4055 int offset_count = 0; 4056 4057 int list_size; 4058 unsigned int *register_list_format = 4059 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4060 if (!register_list_format) 4061 return -ENOMEM; 4062 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4063 adev->gfx.rlc.reg_list_format_size_bytes); 4064 4065 gfx_v8_0_parse_ind_reg_list(register_list_format, 4066 RLC_FormatDirectRegListLength, 4067 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4068 unique_indices, 4069 &indices_count, 4070 ARRAY_SIZE(unique_indices), 4071 indirect_start_offsets, 4072 &offset_count, 4073 ARRAY_SIZE(indirect_start_offsets)); 4074 4075 /* save and restore list */ 4076 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4077 4078 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4079 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4080 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4081 4082 /* indirect list */ 4083 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4084 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4085 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4086 4087 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4088 list_size = list_size >> 1; 4089 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4090 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4091 4092 /* starting offsets starts */ 4093 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4094 adev->gfx.rlc.starting_offsets_start); 4095 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4096 WREG32(mmRLC_GPM_SCRATCH_DATA, 4097 indirect_start_offsets[i]); 4098 4099 /* unique indices */ 4100 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4101 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4102 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4103 if (unique_indices[i] != 0) { 4104 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4105 WREG32(data + i, unique_indices[i] >> 20); 4106 } 4107 } 4108 kfree(register_list_format); 4109 4110 return 0; 4111 } 4112 4113 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4114 { 4115 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4116 } 4117 4118 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4119 { 4120 uint32_t data; 4121 4122 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4123 4124 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4125 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4126 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4128 WREG32(mmRLC_PG_DELAY, data); 4129 4130 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4131 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4132 4133 } 4134 4135 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4136 bool enable) 4137 { 4138 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4139 } 4140 4141 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4142 bool enable) 4143 { 4144 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4145 } 4146 4147 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4148 { 4149 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4150 } 4151 4152 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4153 { 4154 if ((adev->asic_type == CHIP_CARRIZO) || 4155 (adev->asic_type == CHIP_STONEY)) { 4156 gfx_v8_0_init_csb(adev); 4157 gfx_v8_0_init_save_restore_list(adev); 4158 gfx_v8_0_enable_save_restore_machine(adev); 4159 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4160 gfx_v8_0_init_power_gating(adev); 4161 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4162 } else if ((adev->asic_type == CHIP_POLARIS11) || 4163 (adev->asic_type == CHIP_POLARIS12) || 4164 (adev->asic_type == CHIP_VEGAM)) { 4165 gfx_v8_0_init_csb(adev); 4166 gfx_v8_0_init_save_restore_list(adev); 4167 gfx_v8_0_enable_save_restore_machine(adev); 4168 gfx_v8_0_init_power_gating(adev); 4169 } 4170 4171 } 4172 4173 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4174 { 4175 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4176 4177 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4178 gfx_v8_0_wait_for_rlc_serdes(adev); 4179 } 4180 4181 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4182 { 4183 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4184 udelay(50); 4185 4186 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4187 udelay(50); 4188 } 4189 4190 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4191 { 4192 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4193 4194 /* carrizo do enable cp interrupt after cp inited */ 4195 if (!(adev->flags & AMD_IS_APU)) 4196 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4197 4198 udelay(50); 4199 } 4200 4201 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4202 { 4203 const struct rlc_firmware_header_v2_0 *hdr; 4204 const __le32 *fw_data; 4205 unsigned i, fw_size; 4206 4207 if (!adev->gfx.rlc_fw) 4208 return -EINVAL; 4209 4210 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4211 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4212 4213 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4214 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4215 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4216 4217 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4218 for (i = 0; i < fw_size; i++) 4219 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4220 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4221 4222 return 0; 4223 } 4224 4225 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4226 { 4227 int r; 4228 u32 tmp; 4229 4230 gfx_v8_0_rlc_stop(adev); 4231 4232 /* disable CG */ 4233 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4234 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4235 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4236 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4237 if (adev->asic_type == CHIP_POLARIS11 || 4238 adev->asic_type == CHIP_POLARIS10 || 4239 adev->asic_type == CHIP_POLARIS12 || 4240 adev->asic_type == CHIP_VEGAM) { 4241 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4242 tmp &= ~0x3; 4243 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4244 } 4245 4246 /* disable PG */ 4247 WREG32(mmRLC_PG_CNTL, 0); 4248 4249 gfx_v8_0_rlc_reset(adev); 4250 gfx_v8_0_init_pg(adev); 4251 4252 4253 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4254 /* legacy rlc firmware loading */ 4255 r = gfx_v8_0_rlc_load_microcode(adev); 4256 if (r) 4257 return r; 4258 } 4259 4260 gfx_v8_0_rlc_start(adev); 4261 4262 return 0; 4263 } 4264 4265 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4266 { 4267 int i; 4268 u32 tmp = RREG32(mmCP_ME_CNTL); 4269 4270 if (enable) { 4271 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4272 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4274 } else { 4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4276 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4278 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4279 adev->gfx.gfx_ring[i].ready = false; 4280 } 4281 WREG32(mmCP_ME_CNTL, tmp); 4282 udelay(50); 4283 } 4284 4285 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4286 { 4287 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4288 const struct gfx_firmware_header_v1_0 *ce_hdr; 4289 const struct gfx_firmware_header_v1_0 *me_hdr; 4290 const __le32 *fw_data; 4291 unsigned i, fw_size; 4292 4293 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4294 return -EINVAL; 4295 4296 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4297 adev->gfx.pfp_fw->data; 4298 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4299 adev->gfx.ce_fw->data; 4300 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4301 adev->gfx.me_fw->data; 4302 4303 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4304 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4305 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4306 4307 gfx_v8_0_cp_gfx_enable(adev, false); 4308 4309 /* PFP */ 4310 fw_data = (const __le32 *) 4311 (adev->gfx.pfp_fw->data + 4312 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4313 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4314 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4315 for (i = 0; i < fw_size; i++) 4316 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4317 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4318 4319 /* CE */ 4320 fw_data = (const __le32 *) 4321 (adev->gfx.ce_fw->data + 4322 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4323 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4324 WREG32(mmCP_CE_UCODE_ADDR, 0); 4325 for (i = 0; i < fw_size; i++) 4326 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4327 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4328 4329 /* ME */ 4330 fw_data = (const __le32 *) 4331 (adev->gfx.me_fw->data + 4332 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4333 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4334 WREG32(mmCP_ME_RAM_WADDR, 0); 4335 for (i = 0; i < fw_size; i++) 4336 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4337 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4338 4339 return 0; 4340 } 4341 4342 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4343 { 4344 u32 count = 0; 4345 const struct cs_section_def *sect = NULL; 4346 const struct cs_extent_def *ext = NULL; 4347 4348 /* begin clear state */ 4349 count += 2; 4350 /* context control state */ 4351 count += 3; 4352 4353 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4354 for (ext = sect->section; ext->extent != NULL; ++ext) { 4355 if (sect->id == SECT_CONTEXT) 4356 count += 2 + ext->reg_count; 4357 else 4358 return 0; 4359 } 4360 } 4361 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4362 count += 4; 4363 /* end clear state */ 4364 count += 2; 4365 /* clear state */ 4366 count += 2; 4367 4368 return count; 4369 } 4370 4371 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4372 { 4373 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4374 const struct cs_section_def *sect = NULL; 4375 const struct cs_extent_def *ext = NULL; 4376 int r, i; 4377 4378 /* init the CP */ 4379 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4380 WREG32(mmCP_ENDIAN_SWAP, 0); 4381 WREG32(mmCP_DEVICE_ID, 1); 4382 4383 gfx_v8_0_cp_gfx_enable(adev, true); 4384 4385 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4386 if (r) { 4387 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4388 return r; 4389 } 4390 4391 /* clear state buffer */ 4392 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4393 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4394 4395 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4396 amdgpu_ring_write(ring, 0x80000000); 4397 amdgpu_ring_write(ring, 0x80000000); 4398 4399 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4400 for (ext = sect->section; ext->extent != NULL; ++ext) { 4401 if (sect->id == SECT_CONTEXT) { 4402 amdgpu_ring_write(ring, 4403 PACKET3(PACKET3_SET_CONTEXT_REG, 4404 ext->reg_count)); 4405 amdgpu_ring_write(ring, 4406 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4407 for (i = 0; i < ext->reg_count; i++) 4408 amdgpu_ring_write(ring, ext->extent[i]); 4409 } 4410 } 4411 } 4412 4413 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4414 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4415 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4416 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4417 4418 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4419 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4420 4421 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4422 amdgpu_ring_write(ring, 0); 4423 4424 /* init the CE partitions */ 4425 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4426 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4427 amdgpu_ring_write(ring, 0x8000); 4428 amdgpu_ring_write(ring, 0x8000); 4429 4430 amdgpu_ring_commit(ring); 4431 4432 return 0; 4433 } 4434 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4435 { 4436 u32 tmp; 4437 /* no gfx doorbells on iceland */ 4438 if (adev->asic_type == CHIP_TOPAZ) 4439 return; 4440 4441 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4442 4443 if (ring->use_doorbell) { 4444 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4445 DOORBELL_OFFSET, ring->doorbell_index); 4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4447 DOORBELL_HIT, 0); 4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4449 DOORBELL_EN, 1); 4450 } else { 4451 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4452 } 4453 4454 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4455 4456 if (adev->flags & AMD_IS_APU) 4457 return; 4458 4459 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4460 DOORBELL_RANGE_LOWER, 4461 AMDGPU_DOORBELL_GFX_RING0); 4462 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4463 4464 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4465 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4466 } 4467 4468 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4469 { 4470 struct amdgpu_ring *ring; 4471 u32 tmp; 4472 u32 rb_bufsz; 4473 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4474 int r; 4475 4476 /* Set the write pointer delay */ 4477 WREG32(mmCP_RB_WPTR_DELAY, 0); 4478 4479 /* set the RB to use vmid 0 */ 4480 WREG32(mmCP_RB_VMID, 0); 4481 4482 /* Set ring buffer size */ 4483 ring = &adev->gfx.gfx_ring[0]; 4484 rb_bufsz = order_base_2(ring->ring_size / 8); 4485 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4486 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4487 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4489 #ifdef __BIG_ENDIAN 4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4491 #endif 4492 WREG32(mmCP_RB0_CNTL, tmp); 4493 4494 /* Initialize the ring buffer's read and write pointers */ 4495 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4496 ring->wptr = 0; 4497 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4498 4499 /* set the wb address wether it's enabled or not */ 4500 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4501 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4502 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4503 4504 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4505 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4506 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4507 mdelay(1); 4508 WREG32(mmCP_RB0_CNTL, tmp); 4509 4510 rb_addr = ring->gpu_addr >> 8; 4511 WREG32(mmCP_RB0_BASE, rb_addr); 4512 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4513 4514 gfx_v8_0_set_cpg_door_bell(adev, ring); 4515 /* start the ring */ 4516 amdgpu_ring_clear_ring(ring); 4517 gfx_v8_0_cp_gfx_start(adev); 4518 ring->ready = true; 4519 r = amdgpu_ring_test_ring(ring); 4520 if (r) 4521 ring->ready = false; 4522 4523 return r; 4524 } 4525 4526 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4527 { 4528 int i; 4529 4530 if (enable) { 4531 WREG32(mmCP_MEC_CNTL, 0); 4532 } else { 4533 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4534 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4535 adev->gfx.compute_ring[i].ready = false; 4536 adev->gfx.kiq.ring.ready = false; 4537 } 4538 udelay(50); 4539 } 4540 4541 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4542 { 4543 const struct gfx_firmware_header_v1_0 *mec_hdr; 4544 const __le32 *fw_data; 4545 unsigned i, fw_size; 4546 4547 if (!adev->gfx.mec_fw) 4548 return -EINVAL; 4549 4550 gfx_v8_0_cp_compute_enable(adev, false); 4551 4552 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4553 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4554 4555 fw_data = (const __le32 *) 4556 (adev->gfx.mec_fw->data + 4557 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4558 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4559 4560 /* MEC1 */ 4561 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4562 for (i = 0; i < fw_size; i++) 4563 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4564 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4565 4566 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4567 if (adev->gfx.mec2_fw) { 4568 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4569 4570 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4571 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4572 4573 fw_data = (const __le32 *) 4574 (adev->gfx.mec2_fw->data + 4575 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4576 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4577 4578 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4579 for (i = 0; i < fw_size; i++) 4580 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4581 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4582 } 4583 4584 return 0; 4585 } 4586 4587 /* KIQ functions */ 4588 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4589 { 4590 uint32_t tmp; 4591 struct amdgpu_device *adev = ring->adev; 4592 4593 /* tell RLC which is KIQ queue */ 4594 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4595 tmp &= 0xffffff00; 4596 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4597 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4598 tmp |= 0x80; 4599 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4600 } 4601 4602 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4603 { 4604 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4605 uint32_t scratch, tmp = 0; 4606 uint64_t queue_mask = 0; 4607 int r, i; 4608 4609 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4610 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4611 continue; 4612 4613 /* This situation may be hit in the future if a new HW 4614 * generation exposes more than 64 queues. If so, the 4615 * definition of queue_mask needs updating */ 4616 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4617 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4618 break; 4619 } 4620 4621 queue_mask |= (1ull << i); 4622 } 4623 4624 r = amdgpu_gfx_scratch_get(adev, &scratch); 4625 if (r) { 4626 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4627 return r; 4628 } 4629 WREG32(scratch, 0xCAFEDEAD); 4630 4631 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4632 if (r) { 4633 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4634 amdgpu_gfx_scratch_free(adev, scratch); 4635 return r; 4636 } 4637 /* set resources */ 4638 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4639 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4640 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4641 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4642 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4643 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4644 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4645 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4646 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4647 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4648 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4649 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4650 4651 /* map queues */ 4652 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4653 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4654 amdgpu_ring_write(kiq_ring, 4655 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4656 amdgpu_ring_write(kiq_ring, 4657 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4658 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4659 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4660 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4661 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4662 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4663 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4664 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4665 } 4666 /* write to scratch for completion */ 4667 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4668 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4669 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4670 amdgpu_ring_commit(kiq_ring); 4671 4672 for (i = 0; i < adev->usec_timeout; i++) { 4673 tmp = RREG32(scratch); 4674 if (tmp == 0xDEADBEEF) 4675 break; 4676 DRM_UDELAY(1); 4677 } 4678 if (i >= adev->usec_timeout) { 4679 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4680 scratch, tmp); 4681 r = -EINVAL; 4682 } 4683 amdgpu_gfx_scratch_free(adev, scratch); 4684 4685 return r; 4686 } 4687 4688 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4689 { 4690 int i, r = 0; 4691 4692 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4693 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4694 for (i = 0; i < adev->usec_timeout; i++) { 4695 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4696 break; 4697 udelay(1); 4698 } 4699 if (i == adev->usec_timeout) 4700 r = -ETIMEDOUT; 4701 } 4702 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4703 WREG32(mmCP_HQD_PQ_RPTR, 0); 4704 WREG32(mmCP_HQD_PQ_WPTR, 0); 4705 4706 return r; 4707 } 4708 4709 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4710 { 4711 struct amdgpu_device *adev = ring->adev; 4712 struct vi_mqd *mqd = ring->mqd_ptr; 4713 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4714 uint32_t tmp; 4715 4716 mqd->header = 0xC0310800; 4717 mqd->compute_pipelinestat_enable = 0x00000001; 4718 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4719 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4720 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4721 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4722 mqd->compute_misc_reserved = 0x00000003; 4723 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4724 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4725 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4726 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4727 eop_base_addr = ring->eop_gpu_addr >> 8; 4728 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4729 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4730 4731 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4732 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4733 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4734 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4735 4736 mqd->cp_hqd_eop_control = tmp; 4737 4738 /* enable doorbell? */ 4739 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4740 CP_HQD_PQ_DOORBELL_CONTROL, 4741 DOORBELL_EN, 4742 ring->use_doorbell ? 1 : 0); 4743 4744 mqd->cp_hqd_pq_doorbell_control = tmp; 4745 4746 /* set the pointer to the MQD */ 4747 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4748 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4749 4750 /* set MQD vmid to 0 */ 4751 tmp = RREG32(mmCP_MQD_CONTROL); 4752 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4753 mqd->cp_mqd_control = tmp; 4754 4755 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4756 hqd_gpu_addr = ring->gpu_addr >> 8; 4757 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4758 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4759 4760 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4761 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4763 (order_base_2(ring->ring_size / 4) - 1)); 4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4765 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4766 #ifdef __BIG_ENDIAN 4767 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4768 #endif 4769 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4771 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4773 mqd->cp_hqd_pq_control = tmp; 4774 4775 /* set the wb address whether it's enabled or not */ 4776 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4777 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4778 mqd->cp_hqd_pq_rptr_report_addr_hi = 4779 upper_32_bits(wb_gpu_addr) & 0xffff; 4780 4781 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4782 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4783 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4784 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4785 4786 tmp = 0; 4787 /* enable the doorbell if requested */ 4788 if (ring->use_doorbell) { 4789 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4790 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4791 DOORBELL_OFFSET, ring->doorbell_index); 4792 4793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4794 DOORBELL_EN, 1); 4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4796 DOORBELL_SOURCE, 0); 4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4798 DOORBELL_HIT, 0); 4799 } 4800 4801 mqd->cp_hqd_pq_doorbell_control = tmp; 4802 4803 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4804 ring->wptr = 0; 4805 mqd->cp_hqd_pq_wptr = ring->wptr; 4806 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4807 4808 /* set the vmid for the queue */ 4809 mqd->cp_hqd_vmid = 0; 4810 4811 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4812 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4813 mqd->cp_hqd_persistent_state = tmp; 4814 4815 /* set MTYPE */ 4816 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4817 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4818 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4819 mqd->cp_hqd_ib_control = tmp; 4820 4821 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4822 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4823 mqd->cp_hqd_iq_timer = tmp; 4824 4825 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4826 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4827 mqd->cp_hqd_ctx_save_control = tmp; 4828 4829 /* defaults */ 4830 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4831 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4832 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4833 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4834 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4835 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4836 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4837 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4838 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4839 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4840 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4841 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4842 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4843 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4844 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4845 4846 /* activate the queue */ 4847 mqd->cp_hqd_active = 1; 4848 4849 return 0; 4850 } 4851 4852 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4853 struct vi_mqd *mqd) 4854 { 4855 uint32_t mqd_reg; 4856 uint32_t *mqd_data; 4857 4858 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4859 mqd_data = &mqd->cp_mqd_base_addr_lo; 4860 4861 /* disable wptr polling */ 4862 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4863 4864 /* program all HQD registers */ 4865 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4866 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4867 4868 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4869 * This is safe since EOP RPTR==WPTR for any inactive HQD 4870 * on ASICs that do not support context-save. 4871 * EOP writes/reads can start anywhere in the ring. 4872 */ 4873 if (adev->asic_type != CHIP_TONGA) { 4874 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4875 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4876 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4877 } 4878 4879 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4880 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4881 4882 /* activate the HQD */ 4883 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4884 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4885 4886 return 0; 4887 } 4888 4889 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4890 { 4891 struct amdgpu_device *adev = ring->adev; 4892 struct vi_mqd *mqd = ring->mqd_ptr; 4893 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4894 4895 gfx_v8_0_kiq_setting(ring); 4896 4897 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4898 /* reset MQD to a clean status */ 4899 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4900 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4901 4902 /* reset ring buffer */ 4903 ring->wptr = 0; 4904 amdgpu_ring_clear_ring(ring); 4905 mutex_lock(&adev->srbm_mutex); 4906 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4907 gfx_v8_0_mqd_commit(adev, mqd); 4908 vi_srbm_select(adev, 0, 0, 0, 0); 4909 mutex_unlock(&adev->srbm_mutex); 4910 } else { 4911 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4912 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4913 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4914 mutex_lock(&adev->srbm_mutex); 4915 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4916 gfx_v8_0_mqd_init(ring); 4917 gfx_v8_0_mqd_commit(adev, mqd); 4918 vi_srbm_select(adev, 0, 0, 0, 0); 4919 mutex_unlock(&adev->srbm_mutex); 4920 4921 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4922 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4923 } 4924 4925 return 0; 4926 } 4927 4928 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4929 { 4930 struct amdgpu_device *adev = ring->adev; 4931 struct vi_mqd *mqd = ring->mqd_ptr; 4932 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4933 4934 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4935 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4936 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4937 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4938 mutex_lock(&adev->srbm_mutex); 4939 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4940 gfx_v8_0_mqd_init(ring); 4941 vi_srbm_select(adev, 0, 0, 0, 0); 4942 mutex_unlock(&adev->srbm_mutex); 4943 4944 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4945 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4946 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4947 /* reset MQD to a clean status */ 4948 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4949 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4950 /* reset ring buffer */ 4951 ring->wptr = 0; 4952 amdgpu_ring_clear_ring(ring); 4953 } else { 4954 amdgpu_ring_clear_ring(ring); 4955 } 4956 return 0; 4957 } 4958 4959 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4960 { 4961 if (adev->asic_type > CHIP_TONGA) { 4962 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4963 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4964 } 4965 /* enable doorbells */ 4966 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4967 } 4968 4969 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4970 { 4971 struct amdgpu_ring *ring = NULL; 4972 int r = 0, i; 4973 4974 gfx_v8_0_cp_compute_enable(adev, true); 4975 4976 ring = &adev->gfx.kiq.ring; 4977 4978 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4979 if (unlikely(r != 0)) 4980 goto done; 4981 4982 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4983 if (!r) { 4984 r = gfx_v8_0_kiq_init_queue(ring); 4985 amdgpu_bo_kunmap(ring->mqd_obj); 4986 ring->mqd_ptr = NULL; 4987 } 4988 amdgpu_bo_unreserve(ring->mqd_obj); 4989 if (r) 4990 goto done; 4991 4992 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4993 ring = &adev->gfx.compute_ring[i]; 4994 4995 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4996 if (unlikely(r != 0)) 4997 goto done; 4998 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4999 if (!r) { 5000 r = gfx_v8_0_kcq_init_queue(ring); 5001 amdgpu_bo_kunmap(ring->mqd_obj); 5002 ring->mqd_ptr = NULL; 5003 } 5004 amdgpu_bo_unreserve(ring->mqd_obj); 5005 if (r) 5006 goto done; 5007 } 5008 5009 gfx_v8_0_set_mec_doorbell_range(adev); 5010 5011 r = gfx_v8_0_kiq_kcq_enable(adev); 5012 if (r) 5013 goto done; 5014 5015 /* Test KIQ */ 5016 ring = &adev->gfx.kiq.ring; 5017 ring->ready = true; 5018 r = amdgpu_ring_test_ring(ring); 5019 if (r) { 5020 ring->ready = false; 5021 goto done; 5022 } 5023 5024 /* Test KCQs */ 5025 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5026 ring = &adev->gfx.compute_ring[i]; 5027 ring->ready = true; 5028 r = amdgpu_ring_test_ring(ring); 5029 if (r) 5030 ring->ready = false; 5031 } 5032 5033 done: 5034 return r; 5035 } 5036 5037 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5038 { 5039 int r; 5040 5041 if (!(adev->flags & AMD_IS_APU)) 5042 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5043 5044 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 5045 /* legacy firmware loading */ 5046 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5047 if (r) 5048 return r; 5049 5050 r = gfx_v8_0_cp_compute_load_microcode(adev); 5051 if (r) 5052 return r; 5053 } 5054 5055 r = gfx_v8_0_cp_gfx_resume(adev); 5056 if (r) 5057 return r; 5058 5059 r = gfx_v8_0_kiq_resume(adev); 5060 if (r) 5061 return r; 5062 5063 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5064 5065 return 0; 5066 } 5067 5068 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5069 { 5070 gfx_v8_0_cp_gfx_enable(adev, enable); 5071 gfx_v8_0_cp_compute_enable(adev, enable); 5072 } 5073 5074 static int gfx_v8_0_hw_init(void *handle) 5075 { 5076 int r; 5077 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5078 5079 gfx_v8_0_init_golden_registers(adev); 5080 gfx_v8_0_gpu_init(adev); 5081 5082 r = gfx_v8_0_rlc_resume(adev); 5083 if (r) 5084 return r; 5085 5086 r = gfx_v8_0_cp_resume(adev); 5087 5088 return r; 5089 } 5090 5091 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 5092 { 5093 struct amdgpu_device *adev = kiq_ring->adev; 5094 uint32_t scratch, tmp = 0; 5095 int r, i; 5096 5097 r = amdgpu_gfx_scratch_get(adev, &scratch); 5098 if (r) { 5099 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5100 return r; 5101 } 5102 WREG32(scratch, 0xCAFEDEAD); 5103 5104 r = amdgpu_ring_alloc(kiq_ring, 10); 5105 if (r) { 5106 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5107 amdgpu_gfx_scratch_free(adev, scratch); 5108 return r; 5109 } 5110 5111 /* unmap queues */ 5112 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5113 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5114 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5115 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5116 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5117 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5118 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5119 amdgpu_ring_write(kiq_ring, 0); 5120 amdgpu_ring_write(kiq_ring, 0); 5121 amdgpu_ring_write(kiq_ring, 0); 5122 /* write to scratch for completion */ 5123 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5124 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5125 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5126 amdgpu_ring_commit(kiq_ring); 5127 5128 for (i = 0; i < adev->usec_timeout; i++) { 5129 tmp = RREG32(scratch); 5130 if (tmp == 0xDEADBEEF) 5131 break; 5132 DRM_UDELAY(1); 5133 } 5134 if (i >= adev->usec_timeout) { 5135 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5136 r = -EINVAL; 5137 } 5138 amdgpu_gfx_scratch_free(adev, scratch); 5139 return r; 5140 } 5141 5142 static int gfx_v8_0_hw_fini(void *handle) 5143 { 5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5145 int i; 5146 5147 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5148 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5149 5150 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 5151 5152 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 5153 5154 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5155 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5156 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5157 5158 if (amdgpu_sriov_vf(adev)) { 5159 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5160 return 0; 5161 } 5162 gfx_v8_0_cp_enable(adev, false); 5163 gfx_v8_0_rlc_stop(adev); 5164 5165 amdgpu_device_ip_set_powergating_state(adev, 5166 AMD_IP_BLOCK_TYPE_GFX, 5167 AMD_PG_STATE_UNGATE); 5168 5169 return 0; 5170 } 5171 5172 static int gfx_v8_0_suspend(void *handle) 5173 { 5174 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5175 adev->gfx.in_suspend = true; 5176 return gfx_v8_0_hw_fini(adev); 5177 } 5178 5179 static int gfx_v8_0_resume(void *handle) 5180 { 5181 int r; 5182 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5183 5184 r = gfx_v8_0_hw_init(adev); 5185 adev->gfx.in_suspend = false; 5186 return r; 5187 } 5188 5189 static bool gfx_v8_0_is_idle(void *handle) 5190 { 5191 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5192 5193 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5194 return false; 5195 else 5196 return true; 5197 } 5198 5199 static int gfx_v8_0_wait_for_idle(void *handle) 5200 { 5201 unsigned i; 5202 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5203 5204 for (i = 0; i < adev->usec_timeout; i++) { 5205 if (gfx_v8_0_is_idle(handle)) 5206 return 0; 5207 5208 udelay(1); 5209 } 5210 return -ETIMEDOUT; 5211 } 5212 5213 static bool gfx_v8_0_check_soft_reset(void *handle) 5214 { 5215 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5216 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5217 u32 tmp; 5218 5219 /* GRBM_STATUS */ 5220 tmp = RREG32(mmGRBM_STATUS); 5221 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5222 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5223 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5224 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5225 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5226 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5227 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5228 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5229 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5230 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5231 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5232 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5233 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5234 } 5235 5236 /* GRBM_STATUS2 */ 5237 tmp = RREG32(mmGRBM_STATUS2); 5238 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5239 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5240 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5241 5242 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5243 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5244 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5245 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5246 SOFT_RESET_CPF, 1); 5247 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5248 SOFT_RESET_CPC, 1); 5249 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5250 SOFT_RESET_CPG, 1); 5251 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5252 SOFT_RESET_GRBM, 1); 5253 } 5254 5255 /* SRBM_STATUS */ 5256 tmp = RREG32(mmSRBM_STATUS); 5257 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5258 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5259 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5260 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5261 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5262 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5263 5264 if (grbm_soft_reset || srbm_soft_reset) { 5265 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5266 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5267 return true; 5268 } else { 5269 adev->gfx.grbm_soft_reset = 0; 5270 adev->gfx.srbm_soft_reset = 0; 5271 return false; 5272 } 5273 } 5274 5275 static int gfx_v8_0_pre_soft_reset(void *handle) 5276 { 5277 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5278 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5279 5280 if ((!adev->gfx.grbm_soft_reset) && 5281 (!adev->gfx.srbm_soft_reset)) 5282 return 0; 5283 5284 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5285 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5286 5287 /* stop the rlc */ 5288 gfx_v8_0_rlc_stop(adev); 5289 5290 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5291 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5292 /* Disable GFX parsing/prefetching */ 5293 gfx_v8_0_cp_gfx_enable(adev, false); 5294 5295 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5296 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5297 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5299 int i; 5300 5301 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5302 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5303 5304 mutex_lock(&adev->srbm_mutex); 5305 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5306 gfx_v8_0_deactivate_hqd(adev, 2); 5307 vi_srbm_select(adev, 0, 0, 0, 0); 5308 mutex_unlock(&adev->srbm_mutex); 5309 } 5310 /* Disable MEC parsing/prefetching */ 5311 gfx_v8_0_cp_compute_enable(adev, false); 5312 } 5313 5314 return 0; 5315 } 5316 5317 static int gfx_v8_0_soft_reset(void *handle) 5318 { 5319 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5320 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5321 u32 tmp; 5322 5323 if ((!adev->gfx.grbm_soft_reset) && 5324 (!adev->gfx.srbm_soft_reset)) 5325 return 0; 5326 5327 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5328 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5329 5330 if (grbm_soft_reset || srbm_soft_reset) { 5331 tmp = RREG32(mmGMCON_DEBUG); 5332 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5333 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5334 WREG32(mmGMCON_DEBUG, tmp); 5335 udelay(50); 5336 } 5337 5338 if (grbm_soft_reset) { 5339 tmp = RREG32(mmGRBM_SOFT_RESET); 5340 tmp |= grbm_soft_reset; 5341 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5342 WREG32(mmGRBM_SOFT_RESET, tmp); 5343 tmp = RREG32(mmGRBM_SOFT_RESET); 5344 5345 udelay(50); 5346 5347 tmp &= ~grbm_soft_reset; 5348 WREG32(mmGRBM_SOFT_RESET, tmp); 5349 tmp = RREG32(mmGRBM_SOFT_RESET); 5350 } 5351 5352 if (srbm_soft_reset) { 5353 tmp = RREG32(mmSRBM_SOFT_RESET); 5354 tmp |= srbm_soft_reset; 5355 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5356 WREG32(mmSRBM_SOFT_RESET, tmp); 5357 tmp = RREG32(mmSRBM_SOFT_RESET); 5358 5359 udelay(50); 5360 5361 tmp &= ~srbm_soft_reset; 5362 WREG32(mmSRBM_SOFT_RESET, tmp); 5363 tmp = RREG32(mmSRBM_SOFT_RESET); 5364 } 5365 5366 if (grbm_soft_reset || srbm_soft_reset) { 5367 tmp = RREG32(mmGMCON_DEBUG); 5368 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5369 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5370 WREG32(mmGMCON_DEBUG, tmp); 5371 } 5372 5373 /* Wait a little for things to settle down */ 5374 udelay(50); 5375 5376 return 0; 5377 } 5378 5379 static int gfx_v8_0_post_soft_reset(void *handle) 5380 { 5381 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5382 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5383 5384 if ((!adev->gfx.grbm_soft_reset) && 5385 (!adev->gfx.srbm_soft_reset)) 5386 return 0; 5387 5388 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5389 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5390 5391 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5392 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5393 gfx_v8_0_cp_gfx_resume(adev); 5394 5395 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5396 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5397 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5398 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5399 int i; 5400 5401 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5402 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5403 5404 mutex_lock(&adev->srbm_mutex); 5405 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5406 gfx_v8_0_deactivate_hqd(adev, 2); 5407 vi_srbm_select(adev, 0, 0, 0, 0); 5408 mutex_unlock(&adev->srbm_mutex); 5409 } 5410 gfx_v8_0_kiq_resume(adev); 5411 } 5412 gfx_v8_0_rlc_start(adev); 5413 5414 return 0; 5415 } 5416 5417 /** 5418 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5419 * 5420 * @adev: amdgpu_device pointer 5421 * 5422 * Fetches a GPU clock counter snapshot. 5423 * Returns the 64 bit clock counter snapshot. 5424 */ 5425 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5426 { 5427 uint64_t clock; 5428 5429 mutex_lock(&adev->gfx.gpu_clock_mutex); 5430 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5431 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5432 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5433 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5434 return clock; 5435 } 5436 5437 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5438 uint32_t vmid, 5439 uint32_t gds_base, uint32_t gds_size, 5440 uint32_t gws_base, uint32_t gws_size, 5441 uint32_t oa_base, uint32_t oa_size) 5442 { 5443 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5444 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5445 5446 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5447 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5448 5449 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5450 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5451 5452 /* GDS Base */ 5453 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5454 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5455 WRITE_DATA_DST_SEL(0))); 5456 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5457 amdgpu_ring_write(ring, 0); 5458 amdgpu_ring_write(ring, gds_base); 5459 5460 /* GDS Size */ 5461 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5462 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5463 WRITE_DATA_DST_SEL(0))); 5464 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5465 amdgpu_ring_write(ring, 0); 5466 amdgpu_ring_write(ring, gds_size); 5467 5468 /* GWS */ 5469 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5470 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5471 WRITE_DATA_DST_SEL(0))); 5472 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5473 amdgpu_ring_write(ring, 0); 5474 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5475 5476 /* OA */ 5477 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5478 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5479 WRITE_DATA_DST_SEL(0))); 5480 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5481 amdgpu_ring_write(ring, 0); 5482 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5483 } 5484 5485 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5486 { 5487 WREG32(mmSQ_IND_INDEX, 5488 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5489 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5490 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5491 (SQ_IND_INDEX__FORCE_READ_MASK)); 5492 return RREG32(mmSQ_IND_DATA); 5493 } 5494 5495 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5496 uint32_t wave, uint32_t thread, 5497 uint32_t regno, uint32_t num, uint32_t *out) 5498 { 5499 WREG32(mmSQ_IND_INDEX, 5500 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5501 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5502 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5503 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5504 (SQ_IND_INDEX__FORCE_READ_MASK) | 5505 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5506 while (num--) 5507 *(out++) = RREG32(mmSQ_IND_DATA); 5508 } 5509 5510 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5511 { 5512 /* type 0 wave data */ 5513 dst[(*no_fields)++] = 0; 5514 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5515 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5516 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5517 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5518 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5532 } 5533 5534 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5535 uint32_t wave, uint32_t start, 5536 uint32_t size, uint32_t *dst) 5537 { 5538 wave_read_regs( 5539 adev, simd, wave, 0, 5540 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5541 } 5542 5543 5544 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5545 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5546 .select_se_sh = &gfx_v8_0_select_se_sh, 5547 .read_wave_data = &gfx_v8_0_read_wave_data, 5548 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5549 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5550 }; 5551 5552 static int gfx_v8_0_early_init(void *handle) 5553 { 5554 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5555 5556 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5557 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5558 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5559 gfx_v8_0_set_ring_funcs(adev); 5560 gfx_v8_0_set_irq_funcs(adev); 5561 gfx_v8_0_set_gds_init(adev); 5562 gfx_v8_0_set_rlc_funcs(adev); 5563 5564 return 0; 5565 } 5566 5567 static int gfx_v8_0_late_init(void *handle) 5568 { 5569 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5570 int r; 5571 5572 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5573 if (r) 5574 return r; 5575 5576 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5577 if (r) 5578 return r; 5579 5580 /* requires IBs so do in late init after IB pool is initialized */ 5581 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5582 if (r) 5583 return r; 5584 5585 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5586 if (r) { 5587 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5588 return r; 5589 } 5590 5591 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5592 if (r) { 5593 DRM_ERROR( 5594 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5595 r); 5596 return r; 5597 } 5598 5599 return 0; 5600 } 5601 5602 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5603 bool enable) 5604 { 5605 if (((adev->asic_type == CHIP_POLARIS11) || 5606 (adev->asic_type == CHIP_POLARIS12) || 5607 (adev->asic_type == CHIP_VEGAM)) && 5608 adev->powerplay.pp_funcs->set_powergating_by_smu) 5609 /* Send msg to SMU via Powerplay */ 5610 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5611 5612 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5613 } 5614 5615 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5616 bool enable) 5617 { 5618 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5619 } 5620 5621 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5622 bool enable) 5623 { 5624 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5625 } 5626 5627 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5628 bool enable) 5629 { 5630 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5631 } 5632 5633 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5634 bool enable) 5635 { 5636 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5637 5638 /* Read any GFX register to wake up GFX. */ 5639 if (!enable) 5640 RREG32(mmDB_RENDER_CONTROL); 5641 } 5642 5643 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5644 bool enable) 5645 { 5646 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5647 cz_enable_gfx_cg_power_gating(adev, true); 5648 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5649 cz_enable_gfx_pipeline_power_gating(adev, true); 5650 } else { 5651 cz_enable_gfx_cg_power_gating(adev, false); 5652 cz_enable_gfx_pipeline_power_gating(adev, false); 5653 } 5654 } 5655 5656 static int gfx_v8_0_set_powergating_state(void *handle, 5657 enum amd_powergating_state state) 5658 { 5659 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5660 bool enable = (state == AMD_PG_STATE_GATE); 5661 5662 if (amdgpu_sriov_vf(adev)) 5663 return 0; 5664 5665 switch (adev->asic_type) { 5666 case CHIP_CARRIZO: 5667 case CHIP_STONEY: 5668 5669 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5670 cz_enable_sck_slow_down_on_power_up(adev, true); 5671 cz_enable_sck_slow_down_on_power_down(adev, true); 5672 } else { 5673 cz_enable_sck_slow_down_on_power_up(adev, false); 5674 cz_enable_sck_slow_down_on_power_down(adev, false); 5675 } 5676 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5677 cz_enable_cp_power_gating(adev, true); 5678 else 5679 cz_enable_cp_power_gating(adev, false); 5680 5681 cz_update_gfx_cg_power_gating(adev, enable); 5682 5683 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5684 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5685 else 5686 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5687 5688 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5689 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5690 else 5691 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5692 break; 5693 case CHIP_POLARIS11: 5694 case CHIP_POLARIS12: 5695 case CHIP_VEGAM: 5696 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5697 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5698 else 5699 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5700 5701 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5702 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5703 else 5704 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5705 5706 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5707 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5708 else 5709 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5710 break; 5711 default: 5712 break; 5713 } 5714 5715 return 0; 5716 } 5717 5718 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5719 { 5720 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5721 int data; 5722 5723 if (amdgpu_sriov_vf(adev)) 5724 *flags = 0; 5725 5726 /* AMD_CG_SUPPORT_GFX_MGCG */ 5727 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5728 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5729 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5730 5731 /* AMD_CG_SUPPORT_GFX_CGLG */ 5732 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5733 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5734 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5735 5736 /* AMD_CG_SUPPORT_GFX_CGLS */ 5737 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5738 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5739 5740 /* AMD_CG_SUPPORT_GFX_CGTS */ 5741 data = RREG32(mmCGTS_SM_CTRL_REG); 5742 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5743 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5744 5745 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5746 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5747 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5748 5749 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5750 data = RREG32(mmRLC_MEM_SLP_CNTL); 5751 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5752 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5753 5754 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5755 data = RREG32(mmCP_MEM_SLP_CNTL); 5756 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5757 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5758 } 5759 5760 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5761 uint32_t reg_addr, uint32_t cmd) 5762 { 5763 uint32_t data; 5764 5765 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5766 5767 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5768 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5769 5770 data = RREG32(mmRLC_SERDES_WR_CTRL); 5771 if (adev->asic_type == CHIP_STONEY) 5772 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5773 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5774 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5775 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5776 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5777 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5778 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5779 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5780 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5781 else 5782 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5783 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5784 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5785 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5786 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5787 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5788 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5789 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5790 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5791 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5792 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5793 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5794 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5795 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5796 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5797 5798 WREG32(mmRLC_SERDES_WR_CTRL, data); 5799 } 5800 5801 #define MSG_ENTER_RLC_SAFE_MODE 1 5802 #define MSG_EXIT_RLC_SAFE_MODE 0 5803 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5804 #define RLC_GPR_REG2__REQ__SHIFT 0 5805 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5806 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5807 5808 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5809 { 5810 u32 data; 5811 unsigned i; 5812 5813 data = RREG32(mmRLC_CNTL); 5814 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5815 return; 5816 5817 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5818 data |= RLC_SAFE_MODE__CMD_MASK; 5819 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5820 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5821 WREG32(mmRLC_SAFE_MODE, data); 5822 5823 for (i = 0; i < adev->usec_timeout; i++) { 5824 if ((RREG32(mmRLC_GPM_STAT) & 5825 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5826 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5827 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5828 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5829 break; 5830 udelay(1); 5831 } 5832 5833 for (i = 0; i < adev->usec_timeout; i++) { 5834 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5835 break; 5836 udelay(1); 5837 } 5838 adev->gfx.rlc.in_safe_mode = true; 5839 } 5840 } 5841 5842 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5843 { 5844 u32 data = 0; 5845 unsigned i; 5846 5847 data = RREG32(mmRLC_CNTL); 5848 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5849 return; 5850 5851 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5852 if (adev->gfx.rlc.in_safe_mode) { 5853 data |= RLC_SAFE_MODE__CMD_MASK; 5854 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5855 WREG32(mmRLC_SAFE_MODE, data); 5856 adev->gfx.rlc.in_safe_mode = false; 5857 } 5858 } 5859 5860 for (i = 0; i < adev->usec_timeout; i++) { 5861 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5862 break; 5863 udelay(1); 5864 } 5865 } 5866 5867 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5868 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5869 .exit_safe_mode = iceland_exit_rlc_safe_mode 5870 }; 5871 5872 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5873 bool enable) 5874 { 5875 uint32_t temp, data; 5876 5877 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5878 5879 /* It is disabled by HW by default */ 5880 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5881 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5882 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5883 /* 1 - RLC memory Light sleep */ 5884 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5885 5886 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5887 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5888 } 5889 5890 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5891 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5892 if (adev->flags & AMD_IS_APU) 5893 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5894 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5895 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5896 else 5897 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5898 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5899 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5900 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5901 5902 if (temp != data) 5903 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5904 5905 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5906 gfx_v8_0_wait_for_rlc_serdes(adev); 5907 5908 /* 5 - clear mgcg override */ 5909 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5910 5911 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5912 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5913 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5914 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5915 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5916 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5917 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5918 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5919 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5920 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5921 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5922 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5923 if (temp != data) 5924 WREG32(mmCGTS_SM_CTRL_REG, data); 5925 } 5926 udelay(50); 5927 5928 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5929 gfx_v8_0_wait_for_rlc_serdes(adev); 5930 } else { 5931 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5932 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5933 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5934 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5935 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5936 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5937 if (temp != data) 5938 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5939 5940 /* 2 - disable MGLS in RLC */ 5941 data = RREG32(mmRLC_MEM_SLP_CNTL); 5942 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5943 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5944 WREG32(mmRLC_MEM_SLP_CNTL, data); 5945 } 5946 5947 /* 3 - disable MGLS in CP */ 5948 data = RREG32(mmCP_MEM_SLP_CNTL); 5949 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5950 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5951 WREG32(mmCP_MEM_SLP_CNTL, data); 5952 } 5953 5954 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5955 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5956 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5957 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5958 if (temp != data) 5959 WREG32(mmCGTS_SM_CTRL_REG, data); 5960 5961 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5962 gfx_v8_0_wait_for_rlc_serdes(adev); 5963 5964 /* 6 - set mgcg override */ 5965 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5966 5967 udelay(50); 5968 5969 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5970 gfx_v8_0_wait_for_rlc_serdes(adev); 5971 } 5972 5973 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5974 } 5975 5976 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5977 bool enable) 5978 { 5979 uint32_t temp, temp1, data, data1; 5980 5981 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5982 5983 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5984 5985 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5986 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5987 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5988 if (temp1 != data1) 5989 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5990 5991 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5992 gfx_v8_0_wait_for_rlc_serdes(adev); 5993 5994 /* 2 - clear cgcg override */ 5995 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5996 5997 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5998 gfx_v8_0_wait_for_rlc_serdes(adev); 5999 6000 /* 3 - write cmd to set CGLS */ 6001 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6002 6003 /* 4 - enable cgcg */ 6004 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6005 6006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6007 /* enable cgls*/ 6008 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6009 6010 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6011 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6012 6013 if (temp1 != data1) 6014 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6015 } else { 6016 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6017 } 6018 6019 if (temp != data) 6020 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6021 6022 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6023 * Cmp_busy/GFX_Idle interrupts 6024 */ 6025 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6026 } else { 6027 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6028 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6029 6030 /* TEST CGCG */ 6031 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6032 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6033 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6034 if (temp1 != data1) 6035 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6036 6037 /* read gfx register to wake up cgcg */ 6038 RREG32(mmCB_CGTT_SCLK_CTRL); 6039 RREG32(mmCB_CGTT_SCLK_CTRL); 6040 RREG32(mmCB_CGTT_SCLK_CTRL); 6041 RREG32(mmCB_CGTT_SCLK_CTRL); 6042 6043 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6044 gfx_v8_0_wait_for_rlc_serdes(adev); 6045 6046 /* write cmd to Set CGCG Overrride */ 6047 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6048 6049 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6050 gfx_v8_0_wait_for_rlc_serdes(adev); 6051 6052 /* write cmd to Clear CGLS */ 6053 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6054 6055 /* disable cgcg, cgls should be disabled too. */ 6056 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6057 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6058 if (temp != data) 6059 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6060 /* enable interrupts again for PG */ 6061 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6062 } 6063 6064 gfx_v8_0_wait_for_rlc_serdes(adev); 6065 6066 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6067 } 6068 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6069 bool enable) 6070 { 6071 if (enable) { 6072 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6073 * === MGCG + MGLS + TS(CG/LS) === 6074 */ 6075 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6076 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6077 } else { 6078 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6079 * === CGCG + CGLS === 6080 */ 6081 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6082 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6083 } 6084 return 0; 6085 } 6086 6087 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6088 enum amd_clockgating_state state) 6089 { 6090 uint32_t msg_id, pp_state = 0; 6091 uint32_t pp_support_state = 0; 6092 6093 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6094 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6095 pp_support_state = PP_STATE_SUPPORT_LS; 6096 pp_state = PP_STATE_LS; 6097 } 6098 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6099 pp_support_state |= PP_STATE_SUPPORT_CG; 6100 pp_state |= PP_STATE_CG; 6101 } 6102 if (state == AMD_CG_STATE_UNGATE) 6103 pp_state = 0; 6104 6105 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6106 PP_BLOCK_GFX_CG, 6107 pp_support_state, 6108 pp_state); 6109 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6110 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6111 } 6112 6113 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6114 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6115 pp_support_state = PP_STATE_SUPPORT_LS; 6116 pp_state = PP_STATE_LS; 6117 } 6118 6119 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6120 pp_support_state |= PP_STATE_SUPPORT_CG; 6121 pp_state |= PP_STATE_CG; 6122 } 6123 6124 if (state == AMD_CG_STATE_UNGATE) 6125 pp_state = 0; 6126 6127 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6128 PP_BLOCK_GFX_MG, 6129 pp_support_state, 6130 pp_state); 6131 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6132 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6133 } 6134 6135 return 0; 6136 } 6137 6138 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6139 enum amd_clockgating_state state) 6140 { 6141 6142 uint32_t msg_id, pp_state = 0; 6143 uint32_t pp_support_state = 0; 6144 6145 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6146 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6147 pp_support_state = PP_STATE_SUPPORT_LS; 6148 pp_state = PP_STATE_LS; 6149 } 6150 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6151 pp_support_state |= PP_STATE_SUPPORT_CG; 6152 pp_state |= PP_STATE_CG; 6153 } 6154 if (state == AMD_CG_STATE_UNGATE) 6155 pp_state = 0; 6156 6157 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6158 PP_BLOCK_GFX_CG, 6159 pp_support_state, 6160 pp_state); 6161 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6162 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6163 } 6164 6165 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6166 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6167 pp_support_state = PP_STATE_SUPPORT_LS; 6168 pp_state = PP_STATE_LS; 6169 } 6170 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6171 pp_support_state |= PP_STATE_SUPPORT_CG; 6172 pp_state |= PP_STATE_CG; 6173 } 6174 if (state == AMD_CG_STATE_UNGATE) 6175 pp_state = 0; 6176 6177 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6178 PP_BLOCK_GFX_3D, 6179 pp_support_state, 6180 pp_state); 6181 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6182 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6183 } 6184 6185 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6186 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6187 pp_support_state = PP_STATE_SUPPORT_LS; 6188 pp_state = PP_STATE_LS; 6189 } 6190 6191 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6192 pp_support_state |= PP_STATE_SUPPORT_CG; 6193 pp_state |= PP_STATE_CG; 6194 } 6195 6196 if (state == AMD_CG_STATE_UNGATE) 6197 pp_state = 0; 6198 6199 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6200 PP_BLOCK_GFX_MG, 6201 pp_support_state, 6202 pp_state); 6203 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6204 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6205 } 6206 6207 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6208 pp_support_state = PP_STATE_SUPPORT_LS; 6209 6210 if (state == AMD_CG_STATE_UNGATE) 6211 pp_state = 0; 6212 else 6213 pp_state = PP_STATE_LS; 6214 6215 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6216 PP_BLOCK_GFX_RLC, 6217 pp_support_state, 6218 pp_state); 6219 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6220 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6221 } 6222 6223 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6224 pp_support_state = PP_STATE_SUPPORT_LS; 6225 6226 if (state == AMD_CG_STATE_UNGATE) 6227 pp_state = 0; 6228 else 6229 pp_state = PP_STATE_LS; 6230 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6231 PP_BLOCK_GFX_CP, 6232 pp_support_state, 6233 pp_state); 6234 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6235 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6236 } 6237 6238 return 0; 6239 } 6240 6241 static int gfx_v8_0_set_clockgating_state(void *handle, 6242 enum amd_clockgating_state state) 6243 { 6244 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6245 6246 if (amdgpu_sriov_vf(adev)) 6247 return 0; 6248 6249 switch (adev->asic_type) { 6250 case CHIP_FIJI: 6251 case CHIP_CARRIZO: 6252 case CHIP_STONEY: 6253 gfx_v8_0_update_gfx_clock_gating(adev, 6254 state == AMD_CG_STATE_GATE); 6255 break; 6256 case CHIP_TONGA: 6257 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6258 break; 6259 case CHIP_POLARIS10: 6260 case CHIP_POLARIS11: 6261 case CHIP_POLARIS12: 6262 case CHIP_VEGAM: 6263 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6264 break; 6265 default: 6266 break; 6267 } 6268 return 0; 6269 } 6270 6271 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6272 { 6273 return ring->adev->wb.wb[ring->rptr_offs]; 6274 } 6275 6276 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6277 { 6278 struct amdgpu_device *adev = ring->adev; 6279 6280 if (ring->use_doorbell) 6281 /* XXX check if swapping is necessary on BE */ 6282 return ring->adev->wb.wb[ring->wptr_offs]; 6283 else 6284 return RREG32(mmCP_RB0_WPTR); 6285 } 6286 6287 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6288 { 6289 struct amdgpu_device *adev = ring->adev; 6290 6291 if (ring->use_doorbell) { 6292 /* XXX check if swapping is necessary on BE */ 6293 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6294 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6295 } else { 6296 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6297 (void)RREG32(mmCP_RB0_WPTR); 6298 } 6299 } 6300 6301 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6302 { 6303 u32 ref_and_mask, reg_mem_engine; 6304 6305 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6306 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6307 switch (ring->me) { 6308 case 1: 6309 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6310 break; 6311 case 2: 6312 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6313 break; 6314 default: 6315 return; 6316 } 6317 reg_mem_engine = 0; 6318 } else { 6319 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6320 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6321 } 6322 6323 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6324 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6325 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6326 reg_mem_engine)); 6327 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6328 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6329 amdgpu_ring_write(ring, ref_and_mask); 6330 amdgpu_ring_write(ring, ref_and_mask); 6331 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6332 } 6333 6334 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6335 { 6336 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6337 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6338 EVENT_INDEX(4)); 6339 6340 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6341 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6342 EVENT_INDEX(0)); 6343 } 6344 6345 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6346 struct amdgpu_ib *ib, 6347 unsigned vmid, bool ctx_switch) 6348 { 6349 u32 header, control = 0; 6350 6351 if (ib->flags & AMDGPU_IB_FLAG_CE) 6352 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6353 else 6354 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6355 6356 control |= ib->length_dw | (vmid << 24); 6357 6358 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6359 control |= INDIRECT_BUFFER_PRE_ENB(1); 6360 6361 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6362 gfx_v8_0_ring_emit_de_meta(ring); 6363 } 6364 6365 amdgpu_ring_write(ring, header); 6366 amdgpu_ring_write(ring, 6367 #ifdef __BIG_ENDIAN 6368 (2 << 0) | 6369 #endif 6370 (ib->gpu_addr & 0xFFFFFFFC)); 6371 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6372 amdgpu_ring_write(ring, control); 6373 } 6374 6375 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6376 struct amdgpu_ib *ib, 6377 unsigned vmid, bool ctx_switch) 6378 { 6379 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6380 6381 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6382 amdgpu_ring_write(ring, 6383 #ifdef __BIG_ENDIAN 6384 (2 << 0) | 6385 #endif 6386 (ib->gpu_addr & 0xFFFFFFFC)); 6387 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6388 amdgpu_ring_write(ring, control); 6389 } 6390 6391 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6392 u64 seq, unsigned flags) 6393 { 6394 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6395 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6396 6397 /* EVENT_WRITE_EOP - flush caches, send int */ 6398 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6399 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6400 EOP_TC_ACTION_EN | 6401 EOP_TC_WB_ACTION_EN | 6402 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6403 EVENT_INDEX(5))); 6404 amdgpu_ring_write(ring, addr & 0xfffffffc); 6405 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6406 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6407 amdgpu_ring_write(ring, lower_32_bits(seq)); 6408 amdgpu_ring_write(ring, upper_32_bits(seq)); 6409 6410 } 6411 6412 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6413 { 6414 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6415 uint32_t seq = ring->fence_drv.sync_seq; 6416 uint64_t addr = ring->fence_drv.gpu_addr; 6417 6418 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6419 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6420 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6421 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6422 amdgpu_ring_write(ring, addr & 0xfffffffc); 6423 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6424 amdgpu_ring_write(ring, seq); 6425 amdgpu_ring_write(ring, 0xffffffff); 6426 amdgpu_ring_write(ring, 4); /* poll interval */ 6427 } 6428 6429 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6430 unsigned vmid, uint64_t pd_addr) 6431 { 6432 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6433 6434 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6435 6436 /* wait for the invalidate to complete */ 6437 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6438 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6439 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6440 WAIT_REG_MEM_ENGINE(0))); /* me */ 6441 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6442 amdgpu_ring_write(ring, 0); 6443 amdgpu_ring_write(ring, 0); /* ref */ 6444 amdgpu_ring_write(ring, 0); /* mask */ 6445 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6446 6447 /* compute doesn't have PFP */ 6448 if (usepfp) { 6449 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6450 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6451 amdgpu_ring_write(ring, 0x0); 6452 } 6453 } 6454 6455 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6456 { 6457 return ring->adev->wb.wb[ring->wptr_offs]; 6458 } 6459 6460 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6461 { 6462 struct amdgpu_device *adev = ring->adev; 6463 6464 /* XXX check if swapping is necessary on BE */ 6465 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6466 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6467 } 6468 6469 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6470 bool acquire) 6471 { 6472 struct amdgpu_device *adev = ring->adev; 6473 int pipe_num, tmp, reg; 6474 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6475 6476 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6477 6478 /* first me only has 2 entries, GFX and HP3D */ 6479 if (ring->me > 0) 6480 pipe_num -= 2; 6481 6482 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6483 tmp = RREG32(reg); 6484 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6485 WREG32(reg, tmp); 6486 } 6487 6488 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6489 struct amdgpu_ring *ring, 6490 bool acquire) 6491 { 6492 int i, pipe; 6493 bool reserve; 6494 struct amdgpu_ring *iring; 6495 6496 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6497 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6498 if (acquire) 6499 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6500 else 6501 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6502 6503 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6504 /* Clear all reservations - everyone reacquires all resources */ 6505 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6506 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6507 true); 6508 6509 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6510 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6511 true); 6512 } else { 6513 /* Lower all pipes without a current reservation */ 6514 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6515 iring = &adev->gfx.gfx_ring[i]; 6516 pipe = amdgpu_gfx_queue_to_bit(adev, 6517 iring->me, 6518 iring->pipe, 6519 0); 6520 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6521 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6522 } 6523 6524 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6525 iring = &adev->gfx.compute_ring[i]; 6526 pipe = amdgpu_gfx_queue_to_bit(adev, 6527 iring->me, 6528 iring->pipe, 6529 0); 6530 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6531 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6532 } 6533 } 6534 6535 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6536 } 6537 6538 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6539 struct amdgpu_ring *ring, 6540 bool acquire) 6541 { 6542 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6543 uint32_t queue_priority = acquire ? 0xf : 0x0; 6544 6545 mutex_lock(&adev->srbm_mutex); 6546 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6547 6548 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6549 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6550 6551 vi_srbm_select(adev, 0, 0, 0, 0); 6552 mutex_unlock(&adev->srbm_mutex); 6553 } 6554 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6555 enum drm_sched_priority priority) 6556 { 6557 struct amdgpu_device *adev = ring->adev; 6558 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6559 6560 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6561 return; 6562 6563 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6564 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6565 } 6566 6567 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6568 u64 addr, u64 seq, 6569 unsigned flags) 6570 { 6571 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6572 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6573 6574 /* RELEASE_MEM - flush caches, send int */ 6575 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6576 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6577 EOP_TC_ACTION_EN | 6578 EOP_TC_WB_ACTION_EN | 6579 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6580 EVENT_INDEX(5))); 6581 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6582 amdgpu_ring_write(ring, addr & 0xfffffffc); 6583 amdgpu_ring_write(ring, upper_32_bits(addr)); 6584 amdgpu_ring_write(ring, lower_32_bits(seq)); 6585 amdgpu_ring_write(ring, upper_32_bits(seq)); 6586 } 6587 6588 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6589 u64 seq, unsigned int flags) 6590 { 6591 /* we only allocate 32bit for each seq wb address */ 6592 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6593 6594 /* write fence seq to the "addr" */ 6595 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6596 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6597 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6598 amdgpu_ring_write(ring, lower_32_bits(addr)); 6599 amdgpu_ring_write(ring, upper_32_bits(addr)); 6600 amdgpu_ring_write(ring, lower_32_bits(seq)); 6601 6602 if (flags & AMDGPU_FENCE_FLAG_INT) { 6603 /* set register to trigger INT */ 6604 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6605 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6606 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6607 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6608 amdgpu_ring_write(ring, 0); 6609 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6610 } 6611 } 6612 6613 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6614 { 6615 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6616 amdgpu_ring_write(ring, 0); 6617 } 6618 6619 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6620 { 6621 uint32_t dw2 = 0; 6622 6623 if (amdgpu_sriov_vf(ring->adev)) 6624 gfx_v8_0_ring_emit_ce_meta(ring); 6625 6626 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6627 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6628 gfx_v8_0_ring_emit_vgt_flush(ring); 6629 /* set load_global_config & load_global_uconfig */ 6630 dw2 |= 0x8001; 6631 /* set load_cs_sh_regs */ 6632 dw2 |= 0x01000000; 6633 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6634 dw2 |= 0x10002; 6635 6636 /* set load_ce_ram if preamble presented */ 6637 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6638 dw2 |= 0x10000000; 6639 } else { 6640 /* still load_ce_ram if this is the first time preamble presented 6641 * although there is no context switch happens. 6642 */ 6643 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6644 dw2 |= 0x10000000; 6645 } 6646 6647 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6648 amdgpu_ring_write(ring, dw2); 6649 amdgpu_ring_write(ring, 0); 6650 } 6651 6652 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6653 { 6654 unsigned ret; 6655 6656 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6657 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6658 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6659 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6660 ret = ring->wptr & ring->buf_mask; 6661 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6662 return ret; 6663 } 6664 6665 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6666 { 6667 unsigned cur; 6668 6669 BUG_ON(offset > ring->buf_mask); 6670 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6671 6672 cur = (ring->wptr & ring->buf_mask) - 1; 6673 if (likely(cur > offset)) 6674 ring->ring[offset] = cur - offset; 6675 else 6676 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6677 } 6678 6679 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6680 { 6681 struct amdgpu_device *adev = ring->adev; 6682 6683 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6684 amdgpu_ring_write(ring, 0 | /* src: register*/ 6685 (5 << 8) | /* dst: memory */ 6686 (1 << 20)); /* write confirm */ 6687 amdgpu_ring_write(ring, reg); 6688 amdgpu_ring_write(ring, 0); 6689 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6690 adev->virt.reg_val_offs * 4)); 6691 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6692 adev->virt.reg_val_offs * 4)); 6693 } 6694 6695 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6696 uint32_t val) 6697 { 6698 uint32_t cmd; 6699 6700 switch (ring->funcs->type) { 6701 case AMDGPU_RING_TYPE_GFX: 6702 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6703 break; 6704 case AMDGPU_RING_TYPE_KIQ: 6705 cmd = 1 << 16; /* no inc addr */ 6706 break; 6707 default: 6708 cmd = WR_CONFIRM; 6709 break; 6710 } 6711 6712 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6713 amdgpu_ring_write(ring, cmd); 6714 amdgpu_ring_write(ring, reg); 6715 amdgpu_ring_write(ring, 0); 6716 amdgpu_ring_write(ring, val); 6717 } 6718 6719 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6720 enum amdgpu_interrupt_state state) 6721 { 6722 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6723 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6724 } 6725 6726 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6727 int me, int pipe, 6728 enum amdgpu_interrupt_state state) 6729 { 6730 u32 mec_int_cntl, mec_int_cntl_reg; 6731 6732 /* 6733 * amdgpu controls only the first MEC. That's why this function only 6734 * handles the setting of interrupts for this specific MEC. All other 6735 * pipes' interrupts are set by amdkfd. 6736 */ 6737 6738 if (me == 1) { 6739 switch (pipe) { 6740 case 0: 6741 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6742 break; 6743 case 1: 6744 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6745 break; 6746 case 2: 6747 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6748 break; 6749 case 3: 6750 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6751 break; 6752 default: 6753 DRM_DEBUG("invalid pipe %d\n", pipe); 6754 return; 6755 } 6756 } else { 6757 DRM_DEBUG("invalid me %d\n", me); 6758 return; 6759 } 6760 6761 switch (state) { 6762 case AMDGPU_IRQ_STATE_DISABLE: 6763 mec_int_cntl = RREG32(mec_int_cntl_reg); 6764 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6765 WREG32(mec_int_cntl_reg, mec_int_cntl); 6766 break; 6767 case AMDGPU_IRQ_STATE_ENABLE: 6768 mec_int_cntl = RREG32(mec_int_cntl_reg); 6769 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6770 WREG32(mec_int_cntl_reg, mec_int_cntl); 6771 break; 6772 default: 6773 break; 6774 } 6775 } 6776 6777 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6778 struct amdgpu_irq_src *source, 6779 unsigned type, 6780 enum amdgpu_interrupt_state state) 6781 { 6782 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6783 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6784 6785 return 0; 6786 } 6787 6788 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6789 struct amdgpu_irq_src *source, 6790 unsigned type, 6791 enum amdgpu_interrupt_state state) 6792 { 6793 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6794 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6795 6796 return 0; 6797 } 6798 6799 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6800 struct amdgpu_irq_src *src, 6801 unsigned type, 6802 enum amdgpu_interrupt_state state) 6803 { 6804 switch (type) { 6805 case AMDGPU_CP_IRQ_GFX_EOP: 6806 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6807 break; 6808 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6809 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6810 break; 6811 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6812 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6813 break; 6814 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6815 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6816 break; 6817 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6818 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6819 break; 6820 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6821 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6822 break; 6823 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6824 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6825 break; 6826 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6827 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6828 break; 6829 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6830 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6831 break; 6832 default: 6833 break; 6834 } 6835 return 0; 6836 } 6837 6838 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6839 struct amdgpu_irq_src *source, 6840 unsigned int type, 6841 enum amdgpu_interrupt_state state) 6842 { 6843 int enable_flag; 6844 6845 switch (state) { 6846 case AMDGPU_IRQ_STATE_DISABLE: 6847 enable_flag = 0; 6848 break; 6849 6850 case AMDGPU_IRQ_STATE_ENABLE: 6851 enable_flag = 1; 6852 break; 6853 6854 default: 6855 return -EINVAL; 6856 } 6857 6858 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6859 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6860 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6861 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6862 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6863 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6864 enable_flag); 6865 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6866 enable_flag); 6867 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6868 enable_flag); 6869 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6870 enable_flag); 6871 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6872 enable_flag); 6873 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6874 enable_flag); 6875 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6876 enable_flag); 6877 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6878 enable_flag); 6879 6880 return 0; 6881 } 6882 6883 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6884 struct amdgpu_irq_src *source, 6885 unsigned int type, 6886 enum amdgpu_interrupt_state state) 6887 { 6888 int enable_flag; 6889 6890 switch (state) { 6891 case AMDGPU_IRQ_STATE_DISABLE: 6892 enable_flag = 1; 6893 break; 6894 6895 case AMDGPU_IRQ_STATE_ENABLE: 6896 enable_flag = 0; 6897 break; 6898 6899 default: 6900 return -EINVAL; 6901 } 6902 6903 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6904 enable_flag); 6905 6906 return 0; 6907 } 6908 6909 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6910 struct amdgpu_irq_src *source, 6911 struct amdgpu_iv_entry *entry) 6912 { 6913 int i; 6914 u8 me_id, pipe_id, queue_id; 6915 struct amdgpu_ring *ring; 6916 6917 DRM_DEBUG("IH: CP EOP\n"); 6918 me_id = (entry->ring_id & 0x0c) >> 2; 6919 pipe_id = (entry->ring_id & 0x03) >> 0; 6920 queue_id = (entry->ring_id & 0x70) >> 4; 6921 6922 switch (me_id) { 6923 case 0: 6924 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6925 break; 6926 case 1: 6927 case 2: 6928 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6929 ring = &adev->gfx.compute_ring[i]; 6930 /* Per-queue interrupt is supported for MEC starting from VI. 6931 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6932 */ 6933 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6934 amdgpu_fence_process(ring); 6935 } 6936 break; 6937 } 6938 return 0; 6939 } 6940 6941 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6942 struct amdgpu_irq_src *source, 6943 struct amdgpu_iv_entry *entry) 6944 { 6945 DRM_ERROR("Illegal register access in command stream\n"); 6946 schedule_work(&adev->reset_work); 6947 return 0; 6948 } 6949 6950 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6951 struct amdgpu_irq_src *source, 6952 struct amdgpu_iv_entry *entry) 6953 { 6954 DRM_ERROR("Illegal instruction in command stream\n"); 6955 schedule_work(&adev->reset_work); 6956 return 0; 6957 } 6958 6959 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6960 struct amdgpu_irq_src *source, 6961 struct amdgpu_iv_entry *entry) 6962 { 6963 DRM_ERROR("CP EDC/ECC error detected."); 6964 return 0; 6965 } 6966 6967 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6968 { 6969 u32 enc, se_id, sh_id, cu_id; 6970 char type[20]; 6971 int sq_edc_source = -1; 6972 6973 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 6974 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 6975 6976 switch (enc) { 6977 case 0: 6978 DRM_INFO("SQ general purpose intr detected:" 6979 "se_id %d, immed_overflow %d, host_reg_overflow %d," 6980 "host_cmd_overflow %d, cmd_timestamp %d," 6981 "reg_timestamp %d, thread_trace_buff_full %d," 6982 "wlt %d, thread_trace %d.\n", 6983 se_id, 6984 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 6985 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 6986 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 6987 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 6988 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 6989 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 6990 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 6991 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 6992 ); 6993 break; 6994 case 1: 6995 case 2: 6996 6997 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 6998 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 6999 7000 /* 7001 * This function can be called either directly from ISR 7002 * or from BH in which case we can access SQ_EDC_INFO 7003 * instance 7004 */ 7005 if (in_task()) { 7006 mutex_lock(&adev->grbm_idx_mutex); 7007 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 7008 7009 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 7010 7011 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7012 mutex_unlock(&adev->grbm_idx_mutex); 7013 } 7014 7015 if (enc == 1) 7016 sprintf(type, "instruction intr"); 7017 else 7018 sprintf(type, "EDC/ECC error"); 7019 7020 DRM_INFO( 7021 "SQ %s detected: " 7022 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 7023 "trap %s, sq_ed_info.source %s.\n", 7024 type, se_id, sh_id, cu_id, 7025 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 7026 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 7027 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 7028 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 7029 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 7030 ); 7031 break; 7032 default: 7033 DRM_ERROR("SQ invalid encoding type\n."); 7034 } 7035 } 7036 7037 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 7038 { 7039 7040 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 7041 struct sq_work *sq_work = container_of(work, struct sq_work, work); 7042 7043 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 7044 } 7045 7046 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 7047 struct amdgpu_irq_src *source, 7048 struct amdgpu_iv_entry *entry) 7049 { 7050 unsigned ih_data = entry->src_data[0]; 7051 7052 /* 7053 * Try to submit work so SQ_EDC_INFO can be accessed from 7054 * BH. If previous work submission hasn't finished yet 7055 * just print whatever info is possible directly from the ISR. 7056 */ 7057 if (work_pending(&adev->gfx.sq_work.work)) { 7058 gfx_v8_0_parse_sq_irq(adev, ih_data); 7059 } else { 7060 adev->gfx.sq_work.ih_data = ih_data; 7061 schedule_work(&adev->gfx.sq_work.work); 7062 } 7063 7064 return 0; 7065 } 7066 7067 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 7068 struct amdgpu_irq_src *src, 7069 unsigned int type, 7070 enum amdgpu_interrupt_state state) 7071 { 7072 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7073 7074 switch (type) { 7075 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 7076 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 7077 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7078 if (ring->me == 1) 7079 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 7080 ring->pipe, 7081 GENERIC2_INT_ENABLE, 7082 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7083 else 7084 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 7085 ring->pipe, 7086 GENERIC2_INT_ENABLE, 7087 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7088 break; 7089 default: 7090 BUG(); /* kiq only support GENERIC2_INT now */ 7091 break; 7092 } 7093 return 0; 7094 } 7095 7096 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7097 struct amdgpu_irq_src *source, 7098 struct amdgpu_iv_entry *entry) 7099 { 7100 u8 me_id, pipe_id, queue_id; 7101 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7102 7103 me_id = (entry->ring_id & 0x0c) >> 2; 7104 pipe_id = (entry->ring_id & 0x03) >> 0; 7105 queue_id = (entry->ring_id & 0x70) >> 4; 7106 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7107 me_id, pipe_id, queue_id); 7108 7109 amdgpu_fence_process(ring); 7110 return 0; 7111 } 7112 7113 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7114 .name = "gfx_v8_0", 7115 .early_init = gfx_v8_0_early_init, 7116 .late_init = gfx_v8_0_late_init, 7117 .sw_init = gfx_v8_0_sw_init, 7118 .sw_fini = gfx_v8_0_sw_fini, 7119 .hw_init = gfx_v8_0_hw_init, 7120 .hw_fini = gfx_v8_0_hw_fini, 7121 .suspend = gfx_v8_0_suspend, 7122 .resume = gfx_v8_0_resume, 7123 .is_idle = gfx_v8_0_is_idle, 7124 .wait_for_idle = gfx_v8_0_wait_for_idle, 7125 .check_soft_reset = gfx_v8_0_check_soft_reset, 7126 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7127 .soft_reset = gfx_v8_0_soft_reset, 7128 .post_soft_reset = gfx_v8_0_post_soft_reset, 7129 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7130 .set_powergating_state = gfx_v8_0_set_powergating_state, 7131 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7132 }; 7133 7134 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7135 .type = AMDGPU_RING_TYPE_GFX, 7136 .align_mask = 0xff, 7137 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7138 .support_64bit_ptrs = false, 7139 .get_rptr = gfx_v8_0_ring_get_rptr, 7140 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7141 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7142 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7143 5 + /* COND_EXEC */ 7144 7 + /* PIPELINE_SYNC */ 7145 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 7146 8 + /* FENCE for VM_FLUSH */ 7147 20 + /* GDS switch */ 7148 4 + /* double SWITCH_BUFFER, 7149 the first COND_EXEC jump to the place just 7150 prior to this double SWITCH_BUFFER */ 7151 5 + /* COND_EXEC */ 7152 7 + /* HDP_flush */ 7153 4 + /* VGT_flush */ 7154 14 + /* CE_META */ 7155 31 + /* DE_META */ 7156 3 + /* CNTX_CTRL */ 7157 5 + /* HDP_INVL */ 7158 8 + 8 + /* FENCE x2 */ 7159 2, /* SWITCH_BUFFER */ 7160 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7161 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7162 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7163 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7164 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7165 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7166 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7167 .test_ring = gfx_v8_0_ring_test_ring, 7168 .test_ib = gfx_v8_0_ring_test_ib, 7169 .insert_nop = amdgpu_ring_insert_nop, 7170 .pad_ib = amdgpu_ring_generic_pad_ib, 7171 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7172 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7173 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7174 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7175 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7176 }; 7177 7178 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7179 .type = AMDGPU_RING_TYPE_COMPUTE, 7180 .align_mask = 0xff, 7181 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7182 .support_64bit_ptrs = false, 7183 .get_rptr = gfx_v8_0_ring_get_rptr, 7184 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7185 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7186 .emit_frame_size = 7187 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7188 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7189 5 + /* hdp_invalidate */ 7190 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7191 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7192 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7193 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7194 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7195 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7196 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7197 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7198 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7199 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7200 .test_ring = gfx_v8_0_ring_test_ring, 7201 .test_ib = gfx_v8_0_ring_test_ib, 7202 .insert_nop = amdgpu_ring_insert_nop, 7203 .pad_ib = amdgpu_ring_generic_pad_ib, 7204 .set_priority = gfx_v8_0_ring_set_priority_compute, 7205 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7206 }; 7207 7208 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7209 .type = AMDGPU_RING_TYPE_KIQ, 7210 .align_mask = 0xff, 7211 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7212 .support_64bit_ptrs = false, 7213 .get_rptr = gfx_v8_0_ring_get_rptr, 7214 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7215 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7216 .emit_frame_size = 7217 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7218 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7219 5 + /* hdp_invalidate */ 7220 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7221 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7222 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7223 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7224 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7225 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7226 .test_ring = gfx_v8_0_ring_test_ring, 7227 .test_ib = gfx_v8_0_ring_test_ib, 7228 .insert_nop = amdgpu_ring_insert_nop, 7229 .pad_ib = amdgpu_ring_generic_pad_ib, 7230 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7231 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7232 }; 7233 7234 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7235 { 7236 int i; 7237 7238 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7239 7240 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7241 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7242 7243 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7244 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7245 } 7246 7247 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7248 .set = gfx_v8_0_set_eop_interrupt_state, 7249 .process = gfx_v8_0_eop_irq, 7250 }; 7251 7252 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7253 .set = gfx_v8_0_set_priv_reg_fault_state, 7254 .process = gfx_v8_0_priv_reg_irq, 7255 }; 7256 7257 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7258 .set = gfx_v8_0_set_priv_inst_fault_state, 7259 .process = gfx_v8_0_priv_inst_irq, 7260 }; 7261 7262 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7263 .set = gfx_v8_0_kiq_set_interrupt_state, 7264 .process = gfx_v8_0_kiq_irq, 7265 }; 7266 7267 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7268 .set = gfx_v8_0_set_cp_ecc_int_state, 7269 .process = gfx_v8_0_cp_ecc_error_irq, 7270 }; 7271 7272 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7273 .set = gfx_v8_0_set_sq_int_state, 7274 .process = gfx_v8_0_sq_irq, 7275 }; 7276 7277 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7278 { 7279 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7280 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7281 7282 adev->gfx.priv_reg_irq.num_types = 1; 7283 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7284 7285 adev->gfx.priv_inst_irq.num_types = 1; 7286 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7287 7288 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7289 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7290 7291 adev->gfx.cp_ecc_error_irq.num_types = 1; 7292 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7293 7294 adev->gfx.sq_irq.num_types = 1; 7295 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7296 } 7297 7298 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7299 { 7300 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7301 } 7302 7303 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7304 { 7305 /* init asci gds info */ 7306 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7307 adev->gds.gws.total_size = 64; 7308 adev->gds.oa.total_size = 16; 7309 7310 if (adev->gds.mem.total_size == 64 * 1024) { 7311 adev->gds.mem.gfx_partition_size = 4096; 7312 adev->gds.mem.cs_partition_size = 4096; 7313 7314 adev->gds.gws.gfx_partition_size = 4; 7315 adev->gds.gws.cs_partition_size = 4; 7316 7317 adev->gds.oa.gfx_partition_size = 4; 7318 adev->gds.oa.cs_partition_size = 1; 7319 } else { 7320 adev->gds.mem.gfx_partition_size = 1024; 7321 adev->gds.mem.cs_partition_size = 1024; 7322 7323 adev->gds.gws.gfx_partition_size = 16; 7324 adev->gds.gws.cs_partition_size = 16; 7325 7326 adev->gds.oa.gfx_partition_size = 4; 7327 adev->gds.oa.cs_partition_size = 4; 7328 } 7329 } 7330 7331 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7332 u32 bitmap) 7333 { 7334 u32 data; 7335 7336 if (!bitmap) 7337 return; 7338 7339 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7340 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7341 7342 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7343 } 7344 7345 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7346 { 7347 u32 data, mask; 7348 7349 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7350 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7351 7352 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7353 7354 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7355 } 7356 7357 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7358 { 7359 int i, j, k, counter, active_cu_number = 0; 7360 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7361 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7362 unsigned disable_masks[4 * 2]; 7363 u32 ao_cu_num; 7364 7365 memset(cu_info, 0, sizeof(*cu_info)); 7366 7367 if (adev->flags & AMD_IS_APU) 7368 ao_cu_num = 2; 7369 else 7370 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7371 7372 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7373 7374 mutex_lock(&adev->grbm_idx_mutex); 7375 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7376 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7377 mask = 1; 7378 ao_bitmap = 0; 7379 counter = 0; 7380 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7381 if (i < 4 && j < 2) 7382 gfx_v8_0_set_user_cu_inactive_bitmap( 7383 adev, disable_masks[i * 2 + j]); 7384 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7385 cu_info->bitmap[i][j] = bitmap; 7386 7387 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7388 if (bitmap & mask) { 7389 if (counter < ao_cu_num) 7390 ao_bitmap |= mask; 7391 counter ++; 7392 } 7393 mask <<= 1; 7394 } 7395 active_cu_number += counter; 7396 if (i < 2 && j < 2) 7397 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7398 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7399 } 7400 } 7401 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7402 mutex_unlock(&adev->grbm_idx_mutex); 7403 7404 cu_info->number = active_cu_number; 7405 cu_info->ao_cu_mask = ao_cu_mask; 7406 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7407 cu_info->max_waves_per_simd = 10; 7408 cu_info->max_scratch_slots_per_cu = 32; 7409 cu_info->wave_front_size = 64; 7410 cu_info->lds_size = 64; 7411 } 7412 7413 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7414 { 7415 .type = AMD_IP_BLOCK_TYPE_GFX, 7416 .major = 8, 7417 .minor = 0, 7418 .rev = 0, 7419 .funcs = &gfx_v8_0_ip_funcs, 7420 }; 7421 7422 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7423 { 7424 .type = AMD_IP_BLOCK_TYPE_GFX, 7425 .major = 8, 7426 .minor = 1, 7427 .rev = 0, 7428 .funcs = &gfx_v8_0_ip_funcs, 7429 }; 7430 7431 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7432 { 7433 uint64_t ce_payload_addr; 7434 int cnt_ce; 7435 union { 7436 struct vi_ce_ib_state regular; 7437 struct vi_ce_ib_state_chained_ib chained; 7438 } ce_payload = {}; 7439 7440 if (ring->adev->virt.chained_ib_support) { 7441 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7442 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7443 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7444 } else { 7445 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7446 offsetof(struct vi_gfx_meta_data, ce_payload); 7447 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7448 } 7449 7450 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7451 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7452 WRITE_DATA_DST_SEL(8) | 7453 WR_CONFIRM) | 7454 WRITE_DATA_CACHE_POLICY(0)); 7455 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7456 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7457 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7458 } 7459 7460 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7461 { 7462 uint64_t de_payload_addr, gds_addr, csa_addr; 7463 int cnt_de; 7464 union { 7465 struct vi_de_ib_state regular; 7466 struct vi_de_ib_state_chained_ib chained; 7467 } de_payload = {}; 7468 7469 csa_addr = amdgpu_csa_vaddr(ring->adev); 7470 gds_addr = csa_addr + 4096; 7471 if (ring->adev->virt.chained_ib_support) { 7472 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7473 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7474 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7475 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7476 } else { 7477 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7478 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7479 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7480 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7481 } 7482 7483 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7484 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7485 WRITE_DATA_DST_SEL(8) | 7486 WR_CONFIRM) | 7487 WRITE_DATA_CACHE_POLICY(0)); 7488 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7489 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7490 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7491 } 7492