1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #include "ivsrcid/ivsrcid_vislands30.h" 55 56 #define GFX8_NUM_GFX_RINGS 1 57 #define GFX8_MEC_HPD_SIZE 2048 58 59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 63 64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 73 74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 80 81 /* BPM SERDES CMD */ 82 #define SET_BPM_SERDES_CMD 1 83 #define CLE_BPM_SERDES_CMD 0 84 85 /* BPM Register Address*/ 86 enum { 87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 92 BPM_REG_FGCG_MAX 93 }; 94 95 #define RLC_FormatDirectRegListLength 14 96 97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 103 104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 122 123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 129 130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 153 154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 165 166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 172 173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 174 { 175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 191 }; 192 193 static const u32 golden_settings_tonga_a11[] = 194 { 195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 197 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 198 mmGB_GPU_ID, 0x0000000f, 0x00000000, 199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 205 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 211 }; 212 213 static const u32 tonga_golden_common_all[] = 214 { 215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 223 }; 224 225 static const u32 tonga_mgcg_cgcg_init[] = 226 { 227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 302 }; 303 304 static const u32 golden_settings_vegam_a11[] = 305 { 306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 309 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 316 mmSQ_CONFIG, 0x07f80000, 0x01180000, 317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 318 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 vegam_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 333 }; 334 335 static const u32 golden_settings_polaris11_a11[] = 336 { 337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 340 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 347 mmSQ_CONFIG, 0x07f80000, 0x01180000, 348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 349 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 354 }; 355 356 static const u32 polaris11_golden_common_all[] = 357 { 358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 364 }; 365 366 static const u32 golden_settings_polaris10_a11[] = 367 { 368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 372 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 379 mmSQ_CONFIG, 0x07f80000, 0x07180000, 380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 381 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 385 }; 386 387 static const u32 polaris10_golden_common_all[] = 388 { 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 397 }; 398 399 static const u32 fiji_golden_common_all[] = 400 { 401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 411 }; 412 413 static const u32 golden_settings_fiji_a10[] = 414 { 415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 416 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 422 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 426 }; 427 428 static const u32 fiji_mgcg_cgcg_init[] = 429 { 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 465 }; 466 467 static const u32 golden_settings_iceland_a11[] = 468 { 469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 470 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 471 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 472 mmGB_GPU_ID, 0x0000000f, 0x00000000, 473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 480 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 485 }; 486 487 static const u32 iceland_golden_common_all[] = 488 { 489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 497 }; 498 499 static const u32 iceland_mgcg_cgcg_init[] = 500 { 501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 565 }; 566 567 static const u32 cz_golden_settings_a11[] = 568 { 569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 570 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 571 mmGB_GPU_ID, 0x0000000f, 0x00000000, 572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 581 }; 582 583 static const u32 cz_golden_common_all[] = 584 { 585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 593 }; 594 595 static const u32 cz_mgcg_cgcg_init[] = 596 { 597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 672 }; 673 674 static const u32 stoney_golden_settings_a11[] = 675 { 676 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 677 mmGB_GPU_ID, 0x0000000f, 0x00000000, 678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 686 }; 687 688 static const u32 stoney_golden_common_all[] = 689 { 690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 698 }; 699 700 static const u32 stoney_mgcg_cgcg_init[] = 701 { 702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 707 }; 708 709 710 static const char * const sq_edc_source_names[] = { 711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 718 }; 719 720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 728 729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 730 { 731 switch (adev->asic_type) { 732 case CHIP_TOPAZ: 733 amdgpu_device_program_register_sequence(adev, 734 iceland_mgcg_cgcg_init, 735 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 736 amdgpu_device_program_register_sequence(adev, 737 golden_settings_iceland_a11, 738 ARRAY_SIZE(golden_settings_iceland_a11)); 739 amdgpu_device_program_register_sequence(adev, 740 iceland_golden_common_all, 741 ARRAY_SIZE(iceland_golden_common_all)); 742 break; 743 case CHIP_FIJI: 744 amdgpu_device_program_register_sequence(adev, 745 fiji_mgcg_cgcg_init, 746 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 747 amdgpu_device_program_register_sequence(adev, 748 golden_settings_fiji_a10, 749 ARRAY_SIZE(golden_settings_fiji_a10)); 750 amdgpu_device_program_register_sequence(adev, 751 fiji_golden_common_all, 752 ARRAY_SIZE(fiji_golden_common_all)); 753 break; 754 755 case CHIP_TONGA: 756 amdgpu_device_program_register_sequence(adev, 757 tonga_mgcg_cgcg_init, 758 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 759 amdgpu_device_program_register_sequence(adev, 760 golden_settings_tonga_a11, 761 ARRAY_SIZE(golden_settings_tonga_a11)); 762 amdgpu_device_program_register_sequence(adev, 763 tonga_golden_common_all, 764 ARRAY_SIZE(tonga_golden_common_all)); 765 break; 766 case CHIP_VEGAM: 767 amdgpu_device_program_register_sequence(adev, 768 golden_settings_vegam_a11, 769 ARRAY_SIZE(golden_settings_vegam_a11)); 770 amdgpu_device_program_register_sequence(adev, 771 vegam_golden_common_all, 772 ARRAY_SIZE(vegam_golden_common_all)); 773 break; 774 case CHIP_POLARIS11: 775 case CHIP_POLARIS12: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_polaris11_a11, 778 ARRAY_SIZE(golden_settings_polaris11_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 polaris11_golden_common_all, 781 ARRAY_SIZE(polaris11_golden_common_all)); 782 break; 783 case CHIP_POLARIS10: 784 amdgpu_device_program_register_sequence(adev, 785 golden_settings_polaris10_a11, 786 ARRAY_SIZE(golden_settings_polaris10_a11)); 787 amdgpu_device_program_register_sequence(adev, 788 polaris10_golden_common_all, 789 ARRAY_SIZE(polaris10_golden_common_all)); 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 791 if (adev->pdev->revision == 0xc7 && 792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 797 } 798 break; 799 case CHIP_CARRIZO: 800 amdgpu_device_program_register_sequence(adev, 801 cz_mgcg_cgcg_init, 802 ARRAY_SIZE(cz_mgcg_cgcg_init)); 803 amdgpu_device_program_register_sequence(adev, 804 cz_golden_settings_a11, 805 ARRAY_SIZE(cz_golden_settings_a11)); 806 amdgpu_device_program_register_sequence(adev, 807 cz_golden_common_all, 808 ARRAY_SIZE(cz_golden_common_all)); 809 break; 810 case CHIP_STONEY: 811 amdgpu_device_program_register_sequence(adev, 812 stoney_mgcg_cgcg_init, 813 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 814 amdgpu_device_program_register_sequence(adev, 815 stoney_golden_settings_a11, 816 ARRAY_SIZE(stoney_golden_settings_a11)); 817 amdgpu_device_program_register_sequence(adev, 818 stoney_golden_common_all, 819 ARRAY_SIZE(stoney_golden_common_all)); 820 break; 821 default: 822 break; 823 } 824 } 825 826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 827 { 828 adev->gfx.scratch.num_reg = 8; 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 831 } 832 833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 uint32_t scratch; 837 uint32_t tmp = 0; 838 unsigned i; 839 int r; 840 841 r = amdgpu_gfx_scratch_get(adev, &scratch); 842 if (r) { 843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 844 return r; 845 } 846 WREG32(scratch, 0xCAFEDEAD); 847 r = amdgpu_ring_alloc(ring, 3); 848 if (r) { 849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 850 ring->idx, r); 851 amdgpu_gfx_scratch_free(adev, scratch); 852 return r; 853 } 854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 856 amdgpu_ring_write(ring, 0xDEADBEEF); 857 amdgpu_ring_commit(ring); 858 859 for (i = 0; i < adev->usec_timeout; i++) { 860 tmp = RREG32(scratch); 861 if (tmp == 0xDEADBEEF) 862 break; 863 DRM_UDELAY(1); 864 } 865 if (i < adev->usec_timeout) { 866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 867 ring->idx, i); 868 } else { 869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 870 ring->idx, scratch, tmp); 871 r = -EINVAL; 872 } 873 amdgpu_gfx_scratch_free(adev, scratch); 874 return r; 875 } 876 877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 878 { 879 struct amdgpu_device *adev = ring->adev; 880 struct amdgpu_ib ib; 881 struct dma_fence *f = NULL; 882 883 unsigned int index; 884 uint64_t gpu_addr; 885 uint32_t tmp; 886 long r; 887 888 r = amdgpu_device_wb_get(adev, &index); 889 if (r) { 890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 891 return r; 892 } 893 894 gpu_addr = adev->wb.gpu_addr + (index * 4); 895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 896 memset(&ib, 0, sizeof(ib)); 897 r = amdgpu_ib_get(adev, NULL, 16, &ib); 898 if (r) { 899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 900 goto err1; 901 } 902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 904 ib.ptr[2] = lower_32_bits(gpu_addr); 905 ib.ptr[3] = upper_32_bits(gpu_addr); 906 ib.ptr[4] = 0xDEADBEEF; 907 ib.length_dw = 5; 908 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 910 if (r) 911 goto err2; 912 913 r = dma_fence_wait_timeout(f, false, timeout); 914 if (r == 0) { 915 DRM_ERROR("amdgpu: IB test timed out.\n"); 916 r = -ETIMEDOUT; 917 goto err2; 918 } else if (r < 0) { 919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 920 goto err2; 921 } 922 923 tmp = adev->wb.wb[index]; 924 if (tmp == 0xDEADBEEF) { 925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 926 r = 0; 927 } else { 928 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 929 r = -EINVAL; 930 } 931 932 err2: 933 amdgpu_ib_free(adev, &ib, NULL); 934 dma_fence_put(f); 935 err1: 936 amdgpu_device_wb_free(adev, index); 937 return r; 938 } 939 940 941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 942 { 943 release_firmware(adev->gfx.pfp_fw); 944 adev->gfx.pfp_fw = NULL; 945 release_firmware(adev->gfx.me_fw); 946 adev->gfx.me_fw = NULL; 947 release_firmware(adev->gfx.ce_fw); 948 adev->gfx.ce_fw = NULL; 949 release_firmware(adev->gfx.rlc_fw); 950 adev->gfx.rlc_fw = NULL; 951 release_firmware(adev->gfx.mec_fw); 952 adev->gfx.mec_fw = NULL; 953 if ((adev->asic_type != CHIP_STONEY) && 954 (adev->asic_type != CHIP_TOPAZ)) 955 release_firmware(adev->gfx.mec2_fw); 956 adev->gfx.mec2_fw = NULL; 957 958 kfree(adev->gfx.rlc.register_list_format); 959 } 960 961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 962 { 963 const char *chip_name; 964 char fw_name[30]; 965 int err; 966 struct amdgpu_firmware_info *info = NULL; 967 const struct common_firmware_header *header = NULL; 968 const struct gfx_firmware_header_v1_0 *cp_hdr; 969 const struct rlc_firmware_header_v2_0 *rlc_hdr; 970 unsigned int *tmp = NULL, i; 971 972 DRM_DEBUG("\n"); 973 974 switch (adev->asic_type) { 975 case CHIP_TOPAZ: 976 chip_name = "topaz"; 977 break; 978 case CHIP_TONGA: 979 chip_name = "tonga"; 980 break; 981 case CHIP_CARRIZO: 982 chip_name = "carrizo"; 983 break; 984 case CHIP_FIJI: 985 chip_name = "fiji"; 986 break; 987 case CHIP_STONEY: 988 chip_name = "stoney"; 989 break; 990 case CHIP_POLARIS10: 991 chip_name = "polaris10"; 992 break; 993 case CHIP_POLARIS11: 994 chip_name = "polaris11"; 995 break; 996 case CHIP_POLARIS12: 997 chip_name = "polaris12"; 998 break; 999 case CHIP_VEGAM: 1000 chip_name = "vegam"; 1001 break; 1002 default: 1003 BUG(); 1004 } 1005 1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1009 if (err == -ENOENT) { 1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1012 } 1013 } else { 1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1016 } 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1029 if (err == -ENOENT) { 1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1032 } 1033 } else { 1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1036 } 1037 if (err) 1038 goto out; 1039 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1040 if (err) 1041 goto out; 1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1044 1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1046 1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1050 if (err == -ENOENT) { 1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1053 } 1054 } else { 1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1057 } 1058 if (err) 1059 goto out; 1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1061 if (err) 1062 goto out; 1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1066 1067 /* 1068 * Support for MCBP/Virtualization in combination with chained IBs is 1069 * formal released on feature version #46 1070 */ 1071 if (adev->gfx.ce_feature_version >= 46 && 1072 adev->gfx.pfp_feature_version >= 46) { 1073 adev->virt.chained_ib_support = true; 1074 DRM_INFO("Chained IB support enabled!\n"); 1075 } else 1076 adev->virt.chained_ib_support = false; 1077 1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1080 if (err) 1081 goto out; 1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1086 1087 adev->gfx.rlc.save_and_restore_offset = 1088 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1089 adev->gfx.rlc.clear_state_descriptor_offset = 1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1091 adev->gfx.rlc.avail_scratch_ram_locations = 1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1093 adev->gfx.rlc.reg_restore_list_size = 1094 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1095 adev->gfx.rlc.reg_list_format_start = 1096 le32_to_cpu(rlc_hdr->reg_list_format_start); 1097 adev->gfx.rlc.reg_list_format_separate_start = 1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1099 adev->gfx.rlc.starting_offsets_start = 1100 le32_to_cpu(rlc_hdr->starting_offsets_start); 1101 adev->gfx.rlc.reg_list_format_size_bytes = 1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1103 adev->gfx.rlc.reg_list_size_bytes = 1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1105 1106 adev->gfx.rlc.register_list_format = 1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1109 1110 if (!adev->gfx.rlc.register_list_format) { 1111 err = -ENOMEM; 1112 goto out; 1113 } 1114 1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1117 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1119 1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1121 1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1124 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1126 1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1130 if (err == -ENOENT) { 1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1133 } 1134 } else { 1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1137 } 1138 if (err) 1139 goto out; 1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1141 if (err) 1142 goto out; 1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1146 1147 if ((adev->asic_type != CHIP_STONEY) && 1148 (adev->asic_type != CHIP_TOPAZ)) { 1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1152 if (err == -ENOENT) { 1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1155 } 1156 } else { 1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1159 } 1160 if (!err) { 1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1162 if (err) 1163 goto out; 1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1165 adev->gfx.mec2_fw->data; 1166 adev->gfx.mec2_fw_version = 1167 le32_to_cpu(cp_hdr->header.ucode_version); 1168 adev->gfx.mec2_feature_version = 1169 le32_to_cpu(cp_hdr->ucode_feature_version); 1170 } else { 1171 err = 0; 1172 adev->gfx.mec2_fw = NULL; 1173 } 1174 } 1175 1176 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1177 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1178 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1179 info->fw = adev->gfx.pfp_fw; 1180 header = (const struct common_firmware_header *)info->fw->data; 1181 adev->firmware.fw_size += 1182 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1183 1184 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1185 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1186 info->fw = adev->gfx.me_fw; 1187 header = (const struct common_firmware_header *)info->fw->data; 1188 adev->firmware.fw_size += 1189 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1190 1191 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1192 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1193 info->fw = adev->gfx.ce_fw; 1194 header = (const struct common_firmware_header *)info->fw->data; 1195 adev->firmware.fw_size += 1196 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1197 1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1199 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1200 info->fw = adev->gfx.rlc_fw; 1201 header = (const struct common_firmware_header *)info->fw->data; 1202 adev->firmware.fw_size += 1203 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1204 1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1206 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1207 info->fw = adev->gfx.mec_fw; 1208 header = (const struct common_firmware_header *)info->fw->data; 1209 adev->firmware.fw_size += 1210 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1211 1212 /* we need account JT in */ 1213 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1214 adev->firmware.fw_size += 1215 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1216 1217 if (amdgpu_sriov_vf(adev)) { 1218 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1219 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1220 info->fw = adev->gfx.mec_fw; 1221 adev->firmware.fw_size += 1222 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1223 } 1224 1225 if (adev->gfx.mec2_fw) { 1226 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1227 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1228 info->fw = adev->gfx.mec2_fw; 1229 header = (const struct common_firmware_header *)info->fw->data; 1230 adev->firmware.fw_size += 1231 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1232 } 1233 1234 } 1235 1236 out: 1237 if (err) { 1238 dev_err(adev->dev, 1239 "gfx8: Failed to load firmware \"%s\"\n", 1240 fw_name); 1241 release_firmware(adev->gfx.pfp_fw); 1242 adev->gfx.pfp_fw = NULL; 1243 release_firmware(adev->gfx.me_fw); 1244 adev->gfx.me_fw = NULL; 1245 release_firmware(adev->gfx.ce_fw); 1246 adev->gfx.ce_fw = NULL; 1247 release_firmware(adev->gfx.rlc_fw); 1248 adev->gfx.rlc_fw = NULL; 1249 release_firmware(adev->gfx.mec_fw); 1250 adev->gfx.mec_fw = NULL; 1251 release_firmware(adev->gfx.mec2_fw); 1252 adev->gfx.mec2_fw = NULL; 1253 } 1254 return err; 1255 } 1256 1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1258 volatile u32 *buffer) 1259 { 1260 u32 count = 0, i; 1261 const struct cs_section_def *sect = NULL; 1262 const struct cs_extent_def *ext = NULL; 1263 1264 if (adev->gfx.rlc.cs_data == NULL) 1265 return; 1266 if (buffer == NULL) 1267 return; 1268 1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1270 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1271 1272 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1273 buffer[count++] = cpu_to_le32(0x80000000); 1274 buffer[count++] = cpu_to_le32(0x80000000); 1275 1276 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1277 for (ext = sect->section; ext->extent != NULL; ++ext) { 1278 if (sect->id == SECT_CONTEXT) { 1279 buffer[count++] = 1280 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1281 buffer[count++] = cpu_to_le32(ext->reg_index - 1282 PACKET3_SET_CONTEXT_REG_START); 1283 for (i = 0; i < ext->reg_count; i++) 1284 buffer[count++] = cpu_to_le32(ext->extent[i]); 1285 } else { 1286 return; 1287 } 1288 } 1289 } 1290 1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1292 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1293 PACKET3_SET_CONTEXT_REG_START); 1294 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1296 1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1298 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1299 1300 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1301 buffer[count++] = cpu_to_le32(0); 1302 } 1303 1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1305 { 1306 const __le32 *fw_data; 1307 volatile u32 *dst_ptr; 1308 int me, i, max_me = 4; 1309 u32 bo_offset = 0; 1310 u32 table_offset, table_size; 1311 1312 if (adev->asic_type == CHIP_CARRIZO) 1313 max_me = 5; 1314 1315 /* write the cp table buffer */ 1316 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1317 for (me = 0; me < max_me; me++) { 1318 if (me == 0) { 1319 const struct gfx_firmware_header_v1_0 *hdr = 1320 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1321 fw_data = (const __le32 *) 1322 (adev->gfx.ce_fw->data + 1323 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1324 table_offset = le32_to_cpu(hdr->jt_offset); 1325 table_size = le32_to_cpu(hdr->jt_size); 1326 } else if (me == 1) { 1327 const struct gfx_firmware_header_v1_0 *hdr = 1328 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1329 fw_data = (const __le32 *) 1330 (adev->gfx.pfp_fw->data + 1331 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1332 table_offset = le32_to_cpu(hdr->jt_offset); 1333 table_size = le32_to_cpu(hdr->jt_size); 1334 } else if (me == 2) { 1335 const struct gfx_firmware_header_v1_0 *hdr = 1336 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1337 fw_data = (const __le32 *) 1338 (adev->gfx.me_fw->data + 1339 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1340 table_offset = le32_to_cpu(hdr->jt_offset); 1341 table_size = le32_to_cpu(hdr->jt_size); 1342 } else if (me == 3) { 1343 const struct gfx_firmware_header_v1_0 *hdr = 1344 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1345 fw_data = (const __le32 *) 1346 (adev->gfx.mec_fw->data + 1347 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1348 table_offset = le32_to_cpu(hdr->jt_offset); 1349 table_size = le32_to_cpu(hdr->jt_size); 1350 } else if (me == 4) { 1351 const struct gfx_firmware_header_v1_0 *hdr = 1352 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1353 fw_data = (const __le32 *) 1354 (adev->gfx.mec2_fw->data + 1355 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1356 table_offset = le32_to_cpu(hdr->jt_offset); 1357 table_size = le32_to_cpu(hdr->jt_size); 1358 } 1359 1360 for (i = 0; i < table_size; i ++) { 1361 dst_ptr[bo_offset + i] = 1362 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1363 } 1364 1365 bo_offset += table_size; 1366 } 1367 } 1368 1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1370 { 1371 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1373 } 1374 1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1376 { 1377 volatile u32 *dst_ptr; 1378 u32 dws; 1379 const struct cs_section_def *cs_data; 1380 int r; 1381 1382 adev->gfx.rlc.cs_data = vi_cs_data; 1383 1384 cs_data = adev->gfx.rlc.cs_data; 1385 1386 if (cs_data) { 1387 /* clear state block */ 1388 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1389 1390 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1391 AMDGPU_GEM_DOMAIN_VRAM, 1392 &adev->gfx.rlc.clear_state_obj, 1393 &adev->gfx.rlc.clear_state_gpu_addr, 1394 (void **)&adev->gfx.rlc.cs_ptr); 1395 if (r) { 1396 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1397 gfx_v8_0_rlc_fini(adev); 1398 return r; 1399 } 1400 1401 /* set up the cs buffer */ 1402 dst_ptr = adev->gfx.rlc.cs_ptr; 1403 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1404 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1405 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1406 } 1407 1408 if ((adev->asic_type == CHIP_CARRIZO) || 1409 (adev->asic_type == CHIP_STONEY)) { 1410 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1411 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1412 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1413 &adev->gfx.rlc.cp_table_obj, 1414 &adev->gfx.rlc.cp_table_gpu_addr, 1415 (void **)&adev->gfx.rlc.cp_table_ptr); 1416 if (r) { 1417 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1418 return r; 1419 } 1420 1421 cz_init_cp_jump_table(adev); 1422 1423 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1424 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1425 } 1426 1427 return 0; 1428 } 1429 1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1431 { 1432 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1433 } 1434 1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1436 { 1437 int r; 1438 u32 *hpd; 1439 size_t mec_hpd_size; 1440 1441 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1442 1443 /* take ownership of the relevant compute queues */ 1444 amdgpu_gfx_compute_queue_acquire(adev); 1445 1446 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1447 1448 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1449 AMDGPU_GEM_DOMAIN_GTT, 1450 &adev->gfx.mec.hpd_eop_obj, 1451 &adev->gfx.mec.hpd_eop_gpu_addr, 1452 (void **)&hpd); 1453 if (r) { 1454 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1455 return r; 1456 } 1457 1458 memset(hpd, 0, mec_hpd_size); 1459 1460 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1461 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1462 1463 return 0; 1464 } 1465 1466 static const u32 vgpr_init_compute_shader[] = 1467 { 1468 0x7e000209, 0x7e020208, 1469 0x7e040207, 0x7e060206, 1470 0x7e080205, 0x7e0a0204, 1471 0x7e0c0203, 0x7e0e0202, 1472 0x7e100201, 0x7e120200, 1473 0x7e140209, 0x7e160208, 1474 0x7e180207, 0x7e1a0206, 1475 0x7e1c0205, 0x7e1e0204, 1476 0x7e200203, 0x7e220202, 1477 0x7e240201, 0x7e260200, 1478 0x7e280209, 0x7e2a0208, 1479 0x7e2c0207, 0x7e2e0206, 1480 0x7e300205, 0x7e320204, 1481 0x7e340203, 0x7e360202, 1482 0x7e380201, 0x7e3a0200, 1483 0x7e3c0209, 0x7e3e0208, 1484 0x7e400207, 0x7e420206, 1485 0x7e440205, 0x7e460204, 1486 0x7e480203, 0x7e4a0202, 1487 0x7e4c0201, 0x7e4e0200, 1488 0x7e500209, 0x7e520208, 1489 0x7e540207, 0x7e560206, 1490 0x7e580205, 0x7e5a0204, 1491 0x7e5c0203, 0x7e5e0202, 1492 0x7e600201, 0x7e620200, 1493 0x7e640209, 0x7e660208, 1494 0x7e680207, 0x7e6a0206, 1495 0x7e6c0205, 0x7e6e0204, 1496 0x7e700203, 0x7e720202, 1497 0x7e740201, 0x7e760200, 1498 0x7e780209, 0x7e7a0208, 1499 0x7e7c0207, 0x7e7e0206, 1500 0xbf8a0000, 0xbf810000, 1501 }; 1502 1503 static const u32 sgpr_init_compute_shader[] = 1504 { 1505 0xbe8a0100, 0xbe8c0102, 1506 0xbe8e0104, 0xbe900106, 1507 0xbe920108, 0xbe940100, 1508 0xbe960102, 0xbe980104, 1509 0xbe9a0106, 0xbe9c0108, 1510 0xbe9e0100, 0xbea00102, 1511 0xbea20104, 0xbea40106, 1512 0xbea60108, 0xbea80100, 1513 0xbeaa0102, 0xbeac0104, 1514 0xbeae0106, 0xbeb00108, 1515 0xbeb20100, 0xbeb40102, 1516 0xbeb60104, 0xbeb80106, 1517 0xbeba0108, 0xbebc0100, 1518 0xbebe0102, 0xbec00104, 1519 0xbec20106, 0xbec40108, 1520 0xbec60100, 0xbec80102, 1521 0xbee60004, 0xbee70005, 1522 0xbeea0006, 0xbeeb0007, 1523 0xbee80008, 0xbee90009, 1524 0xbefc0000, 0xbf8a0000, 1525 0xbf810000, 0x00000000, 1526 }; 1527 1528 static const u32 vgpr_init_regs[] = 1529 { 1530 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1531 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1532 mmCOMPUTE_NUM_THREAD_X, 256*4, 1533 mmCOMPUTE_NUM_THREAD_Y, 1, 1534 mmCOMPUTE_NUM_THREAD_Z, 1, 1535 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1536 mmCOMPUTE_PGM_RSRC2, 20, 1537 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1538 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1539 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1540 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1541 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1542 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1543 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1544 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1545 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1546 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1547 }; 1548 1549 static const u32 sgpr1_init_regs[] = 1550 { 1551 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1552 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1553 mmCOMPUTE_NUM_THREAD_X, 256*5, 1554 mmCOMPUTE_NUM_THREAD_Y, 1, 1555 mmCOMPUTE_NUM_THREAD_Z, 1, 1556 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1557 mmCOMPUTE_PGM_RSRC2, 20, 1558 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1559 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1560 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1561 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1562 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1563 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1564 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1565 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1566 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1567 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1568 }; 1569 1570 static const u32 sgpr2_init_regs[] = 1571 { 1572 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1573 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1574 mmCOMPUTE_NUM_THREAD_X, 256*5, 1575 mmCOMPUTE_NUM_THREAD_Y, 1, 1576 mmCOMPUTE_NUM_THREAD_Z, 1, 1577 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1578 mmCOMPUTE_PGM_RSRC2, 20, 1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1589 }; 1590 1591 static const u32 sec_ded_counter_registers[] = 1592 { 1593 mmCPC_EDC_ATC_CNT, 1594 mmCPC_EDC_SCRATCH_CNT, 1595 mmCPC_EDC_UCODE_CNT, 1596 mmCPF_EDC_ATC_CNT, 1597 mmCPF_EDC_ROQ_CNT, 1598 mmCPF_EDC_TAG_CNT, 1599 mmCPG_EDC_ATC_CNT, 1600 mmCPG_EDC_DMA_CNT, 1601 mmCPG_EDC_TAG_CNT, 1602 mmDC_EDC_CSINVOC_CNT, 1603 mmDC_EDC_RESTORE_CNT, 1604 mmDC_EDC_STATE_CNT, 1605 mmGDS_EDC_CNT, 1606 mmGDS_EDC_GRBM_CNT, 1607 mmGDS_EDC_OA_DED, 1608 mmSPI_EDC_CNT, 1609 mmSQC_ATC_EDC_GATCL1_CNT, 1610 mmSQC_EDC_CNT, 1611 mmSQ_EDC_DED_CNT, 1612 mmSQ_EDC_INFO, 1613 mmSQ_EDC_SEC_CNT, 1614 mmTCC_EDC_CNT, 1615 mmTCP_ATC_EDC_GATCL1_CNT, 1616 mmTCP_EDC_CNT, 1617 mmTD_EDC_CNT 1618 }; 1619 1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1621 { 1622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1623 struct amdgpu_ib ib; 1624 struct dma_fence *f = NULL; 1625 int r, i; 1626 u32 tmp; 1627 unsigned total_size, vgpr_offset, sgpr_offset; 1628 u64 gpu_addr; 1629 1630 /* only supported on CZ */ 1631 if (adev->asic_type != CHIP_CARRIZO) 1632 return 0; 1633 1634 /* bail if the compute ring is not ready */ 1635 if (!ring->ready) 1636 return 0; 1637 1638 tmp = RREG32(mmGB_EDC_MODE); 1639 WREG32(mmGB_EDC_MODE, 0); 1640 1641 total_size = 1642 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1643 total_size += 1644 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1645 total_size += 1646 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1647 total_size = ALIGN(total_size, 256); 1648 vgpr_offset = total_size; 1649 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1650 sgpr_offset = total_size; 1651 total_size += sizeof(sgpr_init_compute_shader); 1652 1653 /* allocate an indirect buffer to put the commands in */ 1654 memset(&ib, 0, sizeof(ib)); 1655 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1656 if (r) { 1657 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1658 return r; 1659 } 1660 1661 /* load the compute shaders */ 1662 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1663 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1664 1665 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1666 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1667 1668 /* init the ib length to 0 */ 1669 ib.length_dw = 0; 1670 1671 /* VGPR */ 1672 /* write the register state for the compute dispatch */ 1673 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1677 } 1678 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1679 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1681 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1684 1685 /* write dispatch packet */ 1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1687 ib.ptr[ib.length_dw++] = 8; /* x */ 1688 ib.ptr[ib.length_dw++] = 1; /* y */ 1689 ib.ptr[ib.length_dw++] = 1; /* z */ 1690 ib.ptr[ib.length_dw++] = 1691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1692 1693 /* write CS partial flush packet */ 1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1696 1697 /* SGPR1 */ 1698 /* write the register state for the compute dispatch */ 1699 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1701 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1703 } 1704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1705 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1707 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1710 1711 /* write dispatch packet */ 1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1713 ib.ptr[ib.length_dw++] = 8; /* x */ 1714 ib.ptr[ib.length_dw++] = 1; /* y */ 1715 ib.ptr[ib.length_dw++] = 1; /* z */ 1716 ib.ptr[ib.length_dw++] = 1717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1718 1719 /* write CS partial flush packet */ 1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1722 1723 /* SGPR2 */ 1724 /* write the register state for the compute dispatch */ 1725 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1727 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1729 } 1730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1733 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1736 1737 /* write dispatch packet */ 1738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1739 ib.ptr[ib.length_dw++] = 8; /* x */ 1740 ib.ptr[ib.length_dw++] = 1; /* y */ 1741 ib.ptr[ib.length_dw++] = 1; /* z */ 1742 ib.ptr[ib.length_dw++] = 1743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1744 1745 /* write CS partial flush packet */ 1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1748 1749 /* shedule the ib on the ring */ 1750 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1751 if (r) { 1752 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1753 goto fail; 1754 } 1755 1756 /* wait for the GPU to finish processing the IB */ 1757 r = dma_fence_wait(f, false); 1758 if (r) { 1759 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1760 goto fail; 1761 } 1762 1763 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1765 WREG32(mmGB_EDC_MODE, tmp); 1766 1767 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1768 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1769 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1770 1771 1772 /* read back registers to clear the counters */ 1773 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1774 RREG32(sec_ded_counter_registers[i]); 1775 1776 fail: 1777 amdgpu_ib_free(adev, &ib, NULL); 1778 dma_fence_put(f); 1779 1780 return r; 1781 } 1782 1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1784 { 1785 u32 gb_addr_config; 1786 u32 mc_shared_chmap, mc_arb_ramcfg; 1787 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1788 u32 tmp; 1789 int ret; 1790 1791 switch (adev->asic_type) { 1792 case CHIP_TOPAZ: 1793 adev->gfx.config.max_shader_engines = 1; 1794 adev->gfx.config.max_tile_pipes = 2; 1795 adev->gfx.config.max_cu_per_sh = 6; 1796 adev->gfx.config.max_sh_per_se = 1; 1797 adev->gfx.config.max_backends_per_se = 2; 1798 adev->gfx.config.max_texture_channel_caches = 2; 1799 adev->gfx.config.max_gprs = 256; 1800 adev->gfx.config.max_gs_threads = 32; 1801 adev->gfx.config.max_hw_contexts = 8; 1802 1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1807 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1808 break; 1809 case CHIP_FIJI: 1810 adev->gfx.config.max_shader_engines = 4; 1811 adev->gfx.config.max_tile_pipes = 16; 1812 adev->gfx.config.max_cu_per_sh = 16; 1813 adev->gfx.config.max_sh_per_se = 1; 1814 adev->gfx.config.max_backends_per_se = 4; 1815 adev->gfx.config.max_texture_channel_caches = 16; 1816 adev->gfx.config.max_gprs = 256; 1817 adev->gfx.config.max_gs_threads = 32; 1818 adev->gfx.config.max_hw_contexts = 8; 1819 1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1825 break; 1826 case CHIP_POLARIS11: 1827 case CHIP_POLARIS12: 1828 ret = amdgpu_atombios_get_gfx_info(adev); 1829 if (ret) 1830 return ret; 1831 adev->gfx.config.max_gprs = 256; 1832 adev->gfx.config.max_gs_threads = 32; 1833 adev->gfx.config.max_hw_contexts = 8; 1834 1835 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1836 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1837 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1838 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1839 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1840 break; 1841 case CHIP_POLARIS10: 1842 case CHIP_VEGAM: 1843 ret = amdgpu_atombios_get_gfx_info(adev); 1844 if (ret) 1845 return ret; 1846 adev->gfx.config.max_gprs = 256; 1847 adev->gfx.config.max_gs_threads = 32; 1848 adev->gfx.config.max_hw_contexts = 8; 1849 1850 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1851 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1852 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1853 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1854 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1855 break; 1856 case CHIP_TONGA: 1857 adev->gfx.config.max_shader_engines = 4; 1858 adev->gfx.config.max_tile_pipes = 8; 1859 adev->gfx.config.max_cu_per_sh = 8; 1860 adev->gfx.config.max_sh_per_se = 1; 1861 adev->gfx.config.max_backends_per_se = 2; 1862 adev->gfx.config.max_texture_channel_caches = 8; 1863 adev->gfx.config.max_gprs = 256; 1864 adev->gfx.config.max_gs_threads = 32; 1865 adev->gfx.config.max_hw_contexts = 8; 1866 1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1871 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1872 break; 1873 case CHIP_CARRIZO: 1874 adev->gfx.config.max_shader_engines = 1; 1875 adev->gfx.config.max_tile_pipes = 2; 1876 adev->gfx.config.max_sh_per_se = 1; 1877 adev->gfx.config.max_backends_per_se = 2; 1878 adev->gfx.config.max_cu_per_sh = 8; 1879 adev->gfx.config.max_texture_channel_caches = 2; 1880 adev->gfx.config.max_gprs = 256; 1881 adev->gfx.config.max_gs_threads = 32; 1882 adev->gfx.config.max_hw_contexts = 8; 1883 1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1888 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1889 break; 1890 case CHIP_STONEY: 1891 adev->gfx.config.max_shader_engines = 1; 1892 adev->gfx.config.max_tile_pipes = 2; 1893 adev->gfx.config.max_sh_per_se = 1; 1894 adev->gfx.config.max_backends_per_se = 1; 1895 adev->gfx.config.max_cu_per_sh = 3; 1896 adev->gfx.config.max_texture_channel_caches = 2; 1897 adev->gfx.config.max_gprs = 256; 1898 adev->gfx.config.max_gs_threads = 16; 1899 adev->gfx.config.max_hw_contexts = 8; 1900 1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1905 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1906 break; 1907 default: 1908 adev->gfx.config.max_shader_engines = 2; 1909 adev->gfx.config.max_tile_pipes = 4; 1910 adev->gfx.config.max_cu_per_sh = 2; 1911 adev->gfx.config.max_sh_per_se = 1; 1912 adev->gfx.config.max_backends_per_se = 2; 1913 adev->gfx.config.max_texture_channel_caches = 4; 1914 adev->gfx.config.max_gprs = 256; 1915 adev->gfx.config.max_gs_threads = 32; 1916 adev->gfx.config.max_hw_contexts = 8; 1917 1918 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1919 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1920 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1921 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1922 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1923 break; 1924 } 1925 1926 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1927 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1928 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1929 1930 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1931 adev->gfx.config.mem_max_burst_length_bytes = 256; 1932 if (adev->flags & AMD_IS_APU) { 1933 /* Get memory bank mapping mode. */ 1934 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1935 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1936 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1937 1938 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1939 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1940 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1941 1942 /* Validate settings in case only one DIMM installed. */ 1943 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1944 dimm00_addr_map = 0; 1945 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1946 dimm01_addr_map = 0; 1947 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1948 dimm10_addr_map = 0; 1949 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1950 dimm11_addr_map = 0; 1951 1952 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1953 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1954 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1955 adev->gfx.config.mem_row_size_in_kb = 2; 1956 else 1957 adev->gfx.config.mem_row_size_in_kb = 1; 1958 } else { 1959 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1960 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1961 if (adev->gfx.config.mem_row_size_in_kb > 4) 1962 adev->gfx.config.mem_row_size_in_kb = 4; 1963 } 1964 1965 adev->gfx.config.shader_engine_tile_size = 32; 1966 adev->gfx.config.num_gpus = 1; 1967 adev->gfx.config.multi_gpu_tile_size = 64; 1968 1969 /* fix up row size */ 1970 switch (adev->gfx.config.mem_row_size_in_kb) { 1971 case 1: 1972 default: 1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1974 break; 1975 case 2: 1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1977 break; 1978 case 4: 1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1980 break; 1981 } 1982 adev->gfx.config.gb_addr_config = gb_addr_config; 1983 1984 return 0; 1985 } 1986 1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1988 int mec, int pipe, int queue) 1989 { 1990 int r; 1991 unsigned irq_type; 1992 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1993 1994 ring = &adev->gfx.compute_ring[ring_id]; 1995 1996 /* mec0 is me1 */ 1997 ring->me = mec + 1; 1998 ring->pipe = pipe; 1999 ring->queue = queue; 2000 2001 ring->ring_obj = NULL; 2002 ring->use_doorbell = true; 2003 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2004 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2005 + (ring_id * GFX8_MEC_HPD_SIZE); 2006 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2007 2008 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2009 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2010 + ring->pipe; 2011 2012 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2013 r = amdgpu_ring_init(adev, ring, 1024, 2014 &adev->gfx.eop_irq, irq_type); 2015 if (r) 2016 return r; 2017 2018 2019 return 0; 2020 } 2021 2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2023 2024 static int gfx_v8_0_sw_init(void *handle) 2025 { 2026 int i, j, k, r, ring_id; 2027 struct amdgpu_ring *ring; 2028 struct amdgpu_kiq *kiq; 2029 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2030 2031 switch (adev->asic_type) { 2032 case CHIP_TONGA: 2033 case CHIP_CARRIZO: 2034 case CHIP_FIJI: 2035 case CHIP_POLARIS10: 2036 case CHIP_POLARIS11: 2037 case CHIP_POLARIS12: 2038 case CHIP_VEGAM: 2039 adev->gfx.mec.num_mec = 2; 2040 break; 2041 case CHIP_TOPAZ: 2042 case CHIP_STONEY: 2043 default: 2044 adev->gfx.mec.num_mec = 1; 2045 break; 2046 } 2047 2048 adev->gfx.mec.num_pipe_per_mec = 4; 2049 adev->gfx.mec.num_queue_per_pipe = 8; 2050 2051 /* KIQ event */ 2052 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); 2053 if (r) 2054 return r; 2055 2056 /* EOP Event */ 2057 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 2058 if (r) 2059 return r; 2060 2061 /* Privileged reg */ 2062 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 2063 &adev->gfx.priv_reg_irq); 2064 if (r) 2065 return r; 2066 2067 /* Privileged inst */ 2068 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 2069 &adev->gfx.priv_inst_irq); 2070 if (r) 2071 return r; 2072 2073 /* Add CP EDC/ECC irq */ 2074 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 2075 &adev->gfx.cp_ecc_error_irq); 2076 if (r) 2077 return r; 2078 2079 /* SQ interrupts. */ 2080 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 2081 &adev->gfx.sq_irq); 2082 if (r) { 2083 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2084 return r; 2085 } 2086 2087 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2088 2089 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2090 2091 gfx_v8_0_scratch_init(adev); 2092 2093 r = gfx_v8_0_init_microcode(adev); 2094 if (r) { 2095 DRM_ERROR("Failed to load gfx firmware!\n"); 2096 return r; 2097 } 2098 2099 r = gfx_v8_0_rlc_init(adev); 2100 if (r) { 2101 DRM_ERROR("Failed to init rlc BOs!\n"); 2102 return r; 2103 } 2104 2105 r = gfx_v8_0_mec_init(adev); 2106 if (r) { 2107 DRM_ERROR("Failed to init MEC BOs!\n"); 2108 return r; 2109 } 2110 2111 /* set up the gfx ring */ 2112 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2113 ring = &adev->gfx.gfx_ring[i]; 2114 ring->ring_obj = NULL; 2115 sprintf(ring->name, "gfx"); 2116 /* no gfx doorbells on iceland */ 2117 if (adev->asic_type != CHIP_TOPAZ) { 2118 ring->use_doorbell = true; 2119 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2120 } 2121 2122 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2123 AMDGPU_CP_IRQ_GFX_EOP); 2124 if (r) 2125 return r; 2126 } 2127 2128 2129 /* set up the compute queues - allocate horizontally across pipes */ 2130 ring_id = 0; 2131 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2132 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2133 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2134 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2135 continue; 2136 2137 r = gfx_v8_0_compute_ring_init(adev, 2138 ring_id, 2139 i, k, j); 2140 if (r) 2141 return r; 2142 2143 ring_id++; 2144 } 2145 } 2146 } 2147 2148 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2149 if (r) { 2150 DRM_ERROR("Failed to init KIQ BOs!\n"); 2151 return r; 2152 } 2153 2154 kiq = &adev->gfx.kiq; 2155 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2156 if (r) 2157 return r; 2158 2159 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2160 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2161 if (r) 2162 return r; 2163 2164 adev->gfx.ce_ram_size = 0x8000; 2165 2166 r = gfx_v8_0_gpu_early_init(adev); 2167 if (r) 2168 return r; 2169 2170 return 0; 2171 } 2172 2173 static int gfx_v8_0_sw_fini(void *handle) 2174 { 2175 int i; 2176 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2177 2178 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2179 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2180 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2181 2182 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2183 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2184 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2185 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2186 2187 amdgpu_gfx_compute_mqd_sw_fini(adev); 2188 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2189 amdgpu_gfx_kiq_fini(adev); 2190 2191 gfx_v8_0_mec_fini(adev); 2192 gfx_v8_0_rlc_fini(adev); 2193 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2194 &adev->gfx.rlc.clear_state_gpu_addr, 2195 (void **)&adev->gfx.rlc.cs_ptr); 2196 if ((adev->asic_type == CHIP_CARRIZO) || 2197 (adev->asic_type == CHIP_STONEY)) { 2198 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2199 &adev->gfx.rlc.cp_table_gpu_addr, 2200 (void **)&adev->gfx.rlc.cp_table_ptr); 2201 } 2202 gfx_v8_0_free_microcode(adev); 2203 2204 return 0; 2205 } 2206 2207 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2208 { 2209 uint32_t *modearray, *mod2array; 2210 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2211 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2212 u32 reg_offset; 2213 2214 modearray = adev->gfx.config.tile_mode_array; 2215 mod2array = adev->gfx.config.macrotile_mode_array; 2216 2217 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2218 modearray[reg_offset] = 0; 2219 2220 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2221 mod2array[reg_offset] = 0; 2222 2223 switch (adev->asic_type) { 2224 case CHIP_TOPAZ: 2225 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2226 PIPE_CONFIG(ADDR_SURF_P2) | 2227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2229 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2230 PIPE_CONFIG(ADDR_SURF_P2) | 2231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2233 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2234 PIPE_CONFIG(ADDR_SURF_P2) | 2235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2237 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2238 PIPE_CONFIG(ADDR_SURF_P2) | 2239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2241 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2242 PIPE_CONFIG(ADDR_SURF_P2) | 2243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2245 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2246 PIPE_CONFIG(ADDR_SURF_P2) | 2247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2249 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2250 PIPE_CONFIG(ADDR_SURF_P2) | 2251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2253 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2254 PIPE_CONFIG(ADDR_SURF_P2)); 2255 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2256 PIPE_CONFIG(ADDR_SURF_P2) | 2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2259 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2260 PIPE_CONFIG(ADDR_SURF_P2) | 2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2263 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2264 PIPE_CONFIG(ADDR_SURF_P2) | 2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2267 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2268 PIPE_CONFIG(ADDR_SURF_P2) | 2269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2271 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2275 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2279 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2283 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2287 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2288 PIPE_CONFIG(ADDR_SURF_P2) | 2289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2291 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2292 PIPE_CONFIG(ADDR_SURF_P2) | 2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2295 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2296 PIPE_CONFIG(ADDR_SURF_P2) | 2297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2299 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2300 PIPE_CONFIG(ADDR_SURF_P2) | 2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2303 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2304 PIPE_CONFIG(ADDR_SURF_P2) | 2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2307 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2308 PIPE_CONFIG(ADDR_SURF_P2) | 2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2311 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2312 PIPE_CONFIG(ADDR_SURF_P2) | 2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2315 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2316 PIPE_CONFIG(ADDR_SURF_P2) | 2317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2319 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2320 PIPE_CONFIG(ADDR_SURF_P2) | 2321 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2323 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2324 PIPE_CONFIG(ADDR_SURF_P2) | 2325 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2327 2328 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2331 NUM_BANKS(ADDR_SURF_8_BANK)); 2332 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2335 NUM_BANKS(ADDR_SURF_8_BANK)); 2336 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2339 NUM_BANKS(ADDR_SURF_8_BANK)); 2340 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2343 NUM_BANKS(ADDR_SURF_8_BANK)); 2344 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2347 NUM_BANKS(ADDR_SURF_8_BANK)); 2348 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2351 NUM_BANKS(ADDR_SURF_8_BANK)); 2352 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2355 NUM_BANKS(ADDR_SURF_8_BANK)); 2356 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2359 NUM_BANKS(ADDR_SURF_16_BANK)); 2360 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2363 NUM_BANKS(ADDR_SURF_16_BANK)); 2364 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2367 NUM_BANKS(ADDR_SURF_16_BANK)); 2368 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2371 NUM_BANKS(ADDR_SURF_16_BANK)); 2372 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2375 NUM_BANKS(ADDR_SURF_16_BANK)); 2376 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2379 NUM_BANKS(ADDR_SURF_16_BANK)); 2380 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2383 NUM_BANKS(ADDR_SURF_8_BANK)); 2384 2385 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2386 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2387 reg_offset != 23) 2388 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2389 2390 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2391 if (reg_offset != 7) 2392 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2393 2394 break; 2395 case CHIP_FIJI: 2396 case CHIP_VEGAM: 2397 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2401 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2405 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2409 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2413 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2417 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2421 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2425 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2426 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2428 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2429 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2431 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2435 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2439 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2443 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2444 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2447 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2451 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2452 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2455 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2456 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2459 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2460 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2463 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2464 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2467 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2468 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2471 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2472 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2475 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2476 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2479 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2480 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2483 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2484 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2487 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2488 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2491 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2492 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2495 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2496 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2499 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2500 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2501 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2503 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2504 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2507 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2508 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2511 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2512 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2513 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2515 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2516 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2517 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2518 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2519 2520 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2523 NUM_BANKS(ADDR_SURF_8_BANK)); 2524 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2527 NUM_BANKS(ADDR_SURF_8_BANK)); 2528 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2531 NUM_BANKS(ADDR_SURF_8_BANK)); 2532 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2535 NUM_BANKS(ADDR_SURF_8_BANK)); 2536 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2539 NUM_BANKS(ADDR_SURF_8_BANK)); 2540 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2543 NUM_BANKS(ADDR_SURF_8_BANK)); 2544 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2547 NUM_BANKS(ADDR_SURF_8_BANK)); 2548 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2551 NUM_BANKS(ADDR_SURF_8_BANK)); 2552 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2555 NUM_BANKS(ADDR_SURF_8_BANK)); 2556 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2559 NUM_BANKS(ADDR_SURF_8_BANK)); 2560 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2563 NUM_BANKS(ADDR_SURF_8_BANK)); 2564 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2567 NUM_BANKS(ADDR_SURF_8_BANK)); 2568 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2571 NUM_BANKS(ADDR_SURF_8_BANK)); 2572 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2575 NUM_BANKS(ADDR_SURF_4_BANK)); 2576 2577 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2578 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2579 2580 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2581 if (reg_offset != 7) 2582 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2583 2584 break; 2585 case CHIP_TONGA: 2586 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2588 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2590 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2592 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2594 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2596 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2598 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2600 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2602 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2604 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2606 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2610 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2612 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2614 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2615 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2616 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2617 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2618 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2620 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2624 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2628 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2632 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2636 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2640 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2644 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2648 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2649 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2652 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2656 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2657 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2660 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2661 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2664 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2665 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2668 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2669 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2670 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2672 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2673 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2674 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2676 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2677 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2678 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2680 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2681 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2682 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2684 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2685 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2688 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2689 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2692 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2693 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2696 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2697 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2700 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2701 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2702 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2704 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2706 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2708 2709 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2712 NUM_BANKS(ADDR_SURF_16_BANK)); 2713 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2716 NUM_BANKS(ADDR_SURF_16_BANK)); 2717 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2720 NUM_BANKS(ADDR_SURF_16_BANK)); 2721 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2724 NUM_BANKS(ADDR_SURF_16_BANK)); 2725 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2728 NUM_BANKS(ADDR_SURF_16_BANK)); 2729 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2730 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2731 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2732 NUM_BANKS(ADDR_SURF_16_BANK)); 2733 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2736 NUM_BANKS(ADDR_SURF_16_BANK)); 2737 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2740 NUM_BANKS(ADDR_SURF_16_BANK)); 2741 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2744 NUM_BANKS(ADDR_SURF_16_BANK)); 2745 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2748 NUM_BANKS(ADDR_SURF_16_BANK)); 2749 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2752 NUM_BANKS(ADDR_SURF_16_BANK)); 2753 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2756 NUM_BANKS(ADDR_SURF_8_BANK)); 2757 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2760 NUM_BANKS(ADDR_SURF_4_BANK)); 2761 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2764 NUM_BANKS(ADDR_SURF_4_BANK)); 2765 2766 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2767 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2768 2769 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2770 if (reg_offset != 7) 2771 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2772 2773 break; 2774 case CHIP_POLARIS11: 2775 case CHIP_POLARIS12: 2776 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2780 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2784 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2788 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2792 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2796 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2800 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2802 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2804 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2808 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2809 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2810 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2814 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2818 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2822 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2826 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2830 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2834 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2838 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2839 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2842 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2843 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2846 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2847 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2850 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2854 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2855 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2858 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2862 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2866 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2867 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2868 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2870 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2874 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2878 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2880 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2882 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2883 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2884 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2886 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2888 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2890 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2891 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2892 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2894 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2895 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2896 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2898 2899 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2902 NUM_BANKS(ADDR_SURF_16_BANK)); 2903 2904 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2907 NUM_BANKS(ADDR_SURF_16_BANK)); 2908 2909 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2912 NUM_BANKS(ADDR_SURF_16_BANK)); 2913 2914 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2917 NUM_BANKS(ADDR_SURF_16_BANK)); 2918 2919 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2922 NUM_BANKS(ADDR_SURF_16_BANK)); 2923 2924 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2927 NUM_BANKS(ADDR_SURF_16_BANK)); 2928 2929 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2930 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2931 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2932 NUM_BANKS(ADDR_SURF_16_BANK)); 2933 2934 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2937 NUM_BANKS(ADDR_SURF_16_BANK)); 2938 2939 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2942 NUM_BANKS(ADDR_SURF_16_BANK)); 2943 2944 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2947 NUM_BANKS(ADDR_SURF_16_BANK)); 2948 2949 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2952 NUM_BANKS(ADDR_SURF_16_BANK)); 2953 2954 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2957 NUM_BANKS(ADDR_SURF_16_BANK)); 2958 2959 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2962 NUM_BANKS(ADDR_SURF_8_BANK)); 2963 2964 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2967 NUM_BANKS(ADDR_SURF_4_BANK)); 2968 2969 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2970 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2971 2972 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2973 if (reg_offset != 7) 2974 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2975 2976 break; 2977 case CHIP_POLARIS10: 2978 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2982 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2984 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2986 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2990 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2994 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2996 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2998 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3002 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3006 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3007 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3009 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3010 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3012 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3016 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3020 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3024 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3025 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3028 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3032 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3036 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3040 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3044 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3045 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3048 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3049 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3052 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3056 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3060 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3061 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3062 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3064 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3065 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3066 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3068 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3069 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3072 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3076 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3078 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3080 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3082 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3084 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3088 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3089 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3092 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3093 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3094 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3096 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3097 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3098 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3100 3101 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3104 NUM_BANKS(ADDR_SURF_16_BANK)); 3105 3106 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3109 NUM_BANKS(ADDR_SURF_16_BANK)); 3110 3111 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3114 NUM_BANKS(ADDR_SURF_16_BANK)); 3115 3116 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3119 NUM_BANKS(ADDR_SURF_16_BANK)); 3120 3121 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3122 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3123 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3124 NUM_BANKS(ADDR_SURF_16_BANK)); 3125 3126 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3129 NUM_BANKS(ADDR_SURF_16_BANK)); 3130 3131 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3134 NUM_BANKS(ADDR_SURF_16_BANK)); 3135 3136 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3139 NUM_BANKS(ADDR_SURF_16_BANK)); 3140 3141 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3144 NUM_BANKS(ADDR_SURF_16_BANK)); 3145 3146 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3149 NUM_BANKS(ADDR_SURF_16_BANK)); 3150 3151 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3154 NUM_BANKS(ADDR_SURF_16_BANK)); 3155 3156 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3159 NUM_BANKS(ADDR_SURF_8_BANK)); 3160 3161 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3164 NUM_BANKS(ADDR_SURF_4_BANK)); 3165 3166 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3169 NUM_BANKS(ADDR_SURF_4_BANK)); 3170 3171 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3172 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3173 3174 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3175 if (reg_offset != 7) 3176 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3177 3178 break; 3179 case CHIP_STONEY: 3180 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3184 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3188 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3191 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3192 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3195 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3196 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3200 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3204 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3205 PIPE_CONFIG(ADDR_SURF_P2) | 3206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3208 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3209 PIPE_CONFIG(ADDR_SURF_P2)); 3210 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3211 PIPE_CONFIG(ADDR_SURF_P2) | 3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3214 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3215 PIPE_CONFIG(ADDR_SURF_P2) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3218 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3219 PIPE_CONFIG(ADDR_SURF_P2) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3222 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3223 PIPE_CONFIG(ADDR_SURF_P2) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3226 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3227 PIPE_CONFIG(ADDR_SURF_P2) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3230 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3231 PIPE_CONFIG(ADDR_SURF_P2) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3234 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3235 PIPE_CONFIG(ADDR_SURF_P2) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3238 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3239 PIPE_CONFIG(ADDR_SURF_P2) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3242 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3243 PIPE_CONFIG(ADDR_SURF_P2) | 3244 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3246 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3247 PIPE_CONFIG(ADDR_SURF_P2) | 3248 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3250 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3251 PIPE_CONFIG(ADDR_SURF_P2) | 3252 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3254 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3255 PIPE_CONFIG(ADDR_SURF_P2) | 3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3258 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3259 PIPE_CONFIG(ADDR_SURF_P2) | 3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3262 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3263 PIPE_CONFIG(ADDR_SURF_P2) | 3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3266 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3267 PIPE_CONFIG(ADDR_SURF_P2) | 3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3270 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3271 PIPE_CONFIG(ADDR_SURF_P2) | 3272 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3274 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3275 PIPE_CONFIG(ADDR_SURF_P2) | 3276 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3278 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3279 PIPE_CONFIG(ADDR_SURF_P2) | 3280 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3282 3283 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3286 NUM_BANKS(ADDR_SURF_8_BANK)); 3287 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3290 NUM_BANKS(ADDR_SURF_8_BANK)); 3291 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3294 NUM_BANKS(ADDR_SURF_8_BANK)); 3295 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3298 NUM_BANKS(ADDR_SURF_8_BANK)); 3299 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3302 NUM_BANKS(ADDR_SURF_8_BANK)); 3303 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3306 NUM_BANKS(ADDR_SURF_8_BANK)); 3307 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3310 NUM_BANKS(ADDR_SURF_8_BANK)); 3311 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3314 NUM_BANKS(ADDR_SURF_16_BANK)); 3315 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3318 NUM_BANKS(ADDR_SURF_16_BANK)); 3319 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3322 NUM_BANKS(ADDR_SURF_16_BANK)); 3323 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3326 NUM_BANKS(ADDR_SURF_16_BANK)); 3327 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3330 NUM_BANKS(ADDR_SURF_16_BANK)); 3331 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3334 NUM_BANKS(ADDR_SURF_16_BANK)); 3335 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3338 NUM_BANKS(ADDR_SURF_8_BANK)); 3339 3340 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3341 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3342 reg_offset != 23) 3343 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3344 3345 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3346 if (reg_offset != 7) 3347 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3348 3349 break; 3350 default: 3351 dev_warn(adev->dev, 3352 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3353 adev->asic_type); 3354 3355 case CHIP_CARRIZO: 3356 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3357 PIPE_CONFIG(ADDR_SURF_P2) | 3358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3360 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3361 PIPE_CONFIG(ADDR_SURF_P2) | 3362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3364 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3365 PIPE_CONFIG(ADDR_SURF_P2) | 3366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3368 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3369 PIPE_CONFIG(ADDR_SURF_P2) | 3370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3372 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3373 PIPE_CONFIG(ADDR_SURF_P2) | 3374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3376 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3377 PIPE_CONFIG(ADDR_SURF_P2) | 3378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3380 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3381 PIPE_CONFIG(ADDR_SURF_P2) | 3382 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3383 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3384 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3385 PIPE_CONFIG(ADDR_SURF_P2)); 3386 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3387 PIPE_CONFIG(ADDR_SURF_P2) | 3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3390 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3391 PIPE_CONFIG(ADDR_SURF_P2) | 3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3394 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3395 PIPE_CONFIG(ADDR_SURF_P2) | 3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3398 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3399 PIPE_CONFIG(ADDR_SURF_P2) | 3400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3402 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3403 PIPE_CONFIG(ADDR_SURF_P2) | 3404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3406 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3407 PIPE_CONFIG(ADDR_SURF_P2) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3410 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3411 PIPE_CONFIG(ADDR_SURF_P2) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3414 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3415 PIPE_CONFIG(ADDR_SURF_P2) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3418 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3419 PIPE_CONFIG(ADDR_SURF_P2) | 3420 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3422 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3423 PIPE_CONFIG(ADDR_SURF_P2) | 3424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3426 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3427 PIPE_CONFIG(ADDR_SURF_P2) | 3428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3430 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3431 PIPE_CONFIG(ADDR_SURF_P2) | 3432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3434 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3435 PIPE_CONFIG(ADDR_SURF_P2) | 3436 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3438 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3439 PIPE_CONFIG(ADDR_SURF_P2) | 3440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3442 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3443 PIPE_CONFIG(ADDR_SURF_P2) | 3444 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3446 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3447 PIPE_CONFIG(ADDR_SURF_P2) | 3448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3450 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3451 PIPE_CONFIG(ADDR_SURF_P2) | 3452 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3454 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3455 PIPE_CONFIG(ADDR_SURF_P2) | 3456 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3458 3459 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3462 NUM_BANKS(ADDR_SURF_8_BANK)); 3463 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3466 NUM_BANKS(ADDR_SURF_8_BANK)); 3467 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3470 NUM_BANKS(ADDR_SURF_8_BANK)); 3471 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3474 NUM_BANKS(ADDR_SURF_8_BANK)); 3475 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3478 NUM_BANKS(ADDR_SURF_8_BANK)); 3479 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3482 NUM_BANKS(ADDR_SURF_8_BANK)); 3483 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3486 NUM_BANKS(ADDR_SURF_8_BANK)); 3487 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3490 NUM_BANKS(ADDR_SURF_16_BANK)); 3491 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3494 NUM_BANKS(ADDR_SURF_16_BANK)); 3495 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3498 NUM_BANKS(ADDR_SURF_16_BANK)); 3499 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3502 NUM_BANKS(ADDR_SURF_16_BANK)); 3503 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3506 NUM_BANKS(ADDR_SURF_16_BANK)); 3507 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3510 NUM_BANKS(ADDR_SURF_16_BANK)); 3511 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3514 NUM_BANKS(ADDR_SURF_8_BANK)); 3515 3516 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3517 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3518 reg_offset != 23) 3519 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3520 3521 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3522 if (reg_offset != 7) 3523 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3524 3525 break; 3526 } 3527 } 3528 3529 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3530 u32 se_num, u32 sh_num, u32 instance) 3531 { 3532 u32 data; 3533 3534 if (instance == 0xffffffff) 3535 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3536 else 3537 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3538 3539 if (se_num == 0xffffffff) 3540 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3541 else 3542 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3543 3544 if (sh_num == 0xffffffff) 3545 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3546 else 3547 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3548 3549 WREG32(mmGRBM_GFX_INDEX, data); 3550 } 3551 3552 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3553 u32 me, u32 pipe, u32 q) 3554 { 3555 vi_srbm_select(adev, me, pipe, q, 0); 3556 } 3557 3558 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3559 { 3560 u32 data, mask; 3561 3562 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3563 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3564 3565 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3566 3567 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3568 adev->gfx.config.max_sh_per_se); 3569 3570 return (~data) & mask; 3571 } 3572 3573 static void 3574 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3575 { 3576 switch (adev->asic_type) { 3577 case CHIP_FIJI: 3578 case CHIP_VEGAM: 3579 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3580 RB_XSEL2(1) | PKR_MAP(2) | 3581 PKR_XSEL(1) | PKR_YSEL(1) | 3582 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3583 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3584 SE_PAIR_YSEL(2); 3585 break; 3586 case CHIP_TONGA: 3587 case CHIP_POLARIS10: 3588 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3589 SE_XSEL(1) | SE_YSEL(1); 3590 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3591 SE_PAIR_YSEL(2); 3592 break; 3593 case CHIP_TOPAZ: 3594 case CHIP_CARRIZO: 3595 *rconf |= RB_MAP_PKR0(2); 3596 *rconf1 |= 0x0; 3597 break; 3598 case CHIP_POLARIS11: 3599 case CHIP_POLARIS12: 3600 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3601 SE_XSEL(1) | SE_YSEL(1); 3602 *rconf1 |= 0x0; 3603 break; 3604 case CHIP_STONEY: 3605 *rconf |= 0x0; 3606 *rconf1 |= 0x0; 3607 break; 3608 default: 3609 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3610 break; 3611 } 3612 } 3613 3614 static void 3615 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3616 u32 raster_config, u32 raster_config_1, 3617 unsigned rb_mask, unsigned num_rb) 3618 { 3619 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3620 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3621 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3622 unsigned rb_per_se = num_rb / num_se; 3623 unsigned se_mask[4]; 3624 unsigned se; 3625 3626 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3627 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3628 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3629 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3630 3631 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3632 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3633 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3634 3635 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3636 (!se_mask[2] && !se_mask[3]))) { 3637 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3638 3639 if (!se_mask[0] && !se_mask[1]) { 3640 raster_config_1 |= 3641 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3642 } else { 3643 raster_config_1 |= 3644 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3645 } 3646 } 3647 3648 for (se = 0; se < num_se; se++) { 3649 unsigned raster_config_se = raster_config; 3650 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3651 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3652 int idx = (se / 2) * 2; 3653 3654 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3655 raster_config_se &= ~SE_MAP_MASK; 3656 3657 if (!se_mask[idx]) { 3658 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3659 } else { 3660 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3661 } 3662 } 3663 3664 pkr0_mask &= rb_mask; 3665 pkr1_mask &= rb_mask; 3666 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3667 raster_config_se &= ~PKR_MAP_MASK; 3668 3669 if (!pkr0_mask) { 3670 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3671 } else { 3672 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3673 } 3674 } 3675 3676 if (rb_per_se >= 2) { 3677 unsigned rb0_mask = 1 << (se * rb_per_se); 3678 unsigned rb1_mask = rb0_mask << 1; 3679 3680 rb0_mask &= rb_mask; 3681 rb1_mask &= rb_mask; 3682 if (!rb0_mask || !rb1_mask) { 3683 raster_config_se &= ~RB_MAP_PKR0_MASK; 3684 3685 if (!rb0_mask) { 3686 raster_config_se |= 3687 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3688 } else { 3689 raster_config_se |= 3690 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3691 } 3692 } 3693 3694 if (rb_per_se > 2) { 3695 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3696 rb1_mask = rb0_mask << 1; 3697 rb0_mask &= rb_mask; 3698 rb1_mask &= rb_mask; 3699 if (!rb0_mask || !rb1_mask) { 3700 raster_config_se &= ~RB_MAP_PKR1_MASK; 3701 3702 if (!rb0_mask) { 3703 raster_config_se |= 3704 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3705 } else { 3706 raster_config_se |= 3707 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3708 } 3709 } 3710 } 3711 } 3712 3713 /* GRBM_GFX_INDEX has a different offset on VI */ 3714 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3715 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3716 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3717 } 3718 3719 /* GRBM_GFX_INDEX has a different offset on VI */ 3720 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3721 } 3722 3723 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3724 { 3725 int i, j; 3726 u32 data; 3727 u32 raster_config = 0, raster_config_1 = 0; 3728 u32 active_rbs = 0; 3729 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3730 adev->gfx.config.max_sh_per_se; 3731 unsigned num_rb_pipes; 3732 3733 mutex_lock(&adev->grbm_idx_mutex); 3734 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3735 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3736 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3737 data = gfx_v8_0_get_rb_active_bitmap(adev); 3738 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3739 rb_bitmap_width_per_sh); 3740 } 3741 } 3742 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3743 3744 adev->gfx.config.backend_enable_mask = active_rbs; 3745 adev->gfx.config.num_rbs = hweight32(active_rbs); 3746 3747 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3748 adev->gfx.config.max_shader_engines, 16); 3749 3750 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3751 3752 if (!adev->gfx.config.backend_enable_mask || 3753 adev->gfx.config.num_rbs >= num_rb_pipes) { 3754 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3755 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3756 } else { 3757 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3758 adev->gfx.config.backend_enable_mask, 3759 num_rb_pipes); 3760 } 3761 3762 /* cache the values for userspace */ 3763 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3764 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3765 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3766 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3767 RREG32(mmCC_RB_BACKEND_DISABLE); 3768 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3769 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3770 adev->gfx.config.rb_config[i][j].raster_config = 3771 RREG32(mmPA_SC_RASTER_CONFIG); 3772 adev->gfx.config.rb_config[i][j].raster_config_1 = 3773 RREG32(mmPA_SC_RASTER_CONFIG_1); 3774 } 3775 } 3776 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3777 mutex_unlock(&adev->grbm_idx_mutex); 3778 } 3779 3780 /** 3781 * gfx_v8_0_init_compute_vmid - gart enable 3782 * 3783 * @adev: amdgpu_device pointer 3784 * 3785 * Initialize compute vmid sh_mem registers 3786 * 3787 */ 3788 #define DEFAULT_SH_MEM_BASES (0x6000) 3789 #define FIRST_COMPUTE_VMID (8) 3790 #define LAST_COMPUTE_VMID (16) 3791 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3792 { 3793 int i; 3794 uint32_t sh_mem_config; 3795 uint32_t sh_mem_bases; 3796 3797 /* 3798 * Configure apertures: 3799 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3800 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3801 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3802 */ 3803 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3804 3805 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3806 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3807 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3808 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3809 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3810 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3811 3812 mutex_lock(&adev->srbm_mutex); 3813 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3814 vi_srbm_select(adev, 0, 0, 0, i); 3815 /* CP and shaders */ 3816 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3817 WREG32(mmSH_MEM_APE1_BASE, 1); 3818 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3819 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3820 } 3821 vi_srbm_select(adev, 0, 0, 0, 0); 3822 mutex_unlock(&adev->srbm_mutex); 3823 } 3824 3825 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3826 { 3827 switch (adev->asic_type) { 3828 default: 3829 adev->gfx.config.double_offchip_lds_buf = 1; 3830 break; 3831 case CHIP_CARRIZO: 3832 case CHIP_STONEY: 3833 adev->gfx.config.double_offchip_lds_buf = 0; 3834 break; 3835 } 3836 } 3837 3838 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3839 { 3840 u32 tmp, sh_static_mem_cfg; 3841 int i; 3842 3843 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3844 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3845 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3846 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3847 3848 gfx_v8_0_tiling_mode_table_init(adev); 3849 gfx_v8_0_setup_rb(adev); 3850 gfx_v8_0_get_cu_info(adev); 3851 gfx_v8_0_config_init(adev); 3852 3853 /* XXX SH_MEM regs */ 3854 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3855 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3856 SWIZZLE_ENABLE, 1); 3857 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3858 ELEMENT_SIZE, 1); 3859 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3860 INDEX_STRIDE, 3); 3861 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3862 3863 mutex_lock(&adev->srbm_mutex); 3864 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3865 vi_srbm_select(adev, 0, 0, 0, i); 3866 /* CP and shaders */ 3867 if (i == 0) { 3868 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3869 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3870 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3871 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3872 WREG32(mmSH_MEM_CONFIG, tmp); 3873 WREG32(mmSH_MEM_BASES, 0); 3874 } else { 3875 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3876 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3877 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3878 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3879 WREG32(mmSH_MEM_CONFIG, tmp); 3880 tmp = adev->gmc.shared_aperture_start >> 48; 3881 WREG32(mmSH_MEM_BASES, tmp); 3882 } 3883 3884 WREG32(mmSH_MEM_APE1_BASE, 1); 3885 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3886 } 3887 vi_srbm_select(adev, 0, 0, 0, 0); 3888 mutex_unlock(&adev->srbm_mutex); 3889 3890 gfx_v8_0_init_compute_vmid(adev); 3891 3892 mutex_lock(&adev->grbm_idx_mutex); 3893 /* 3894 * making sure that the following register writes will be broadcasted 3895 * to all the shaders 3896 */ 3897 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3898 3899 WREG32(mmPA_SC_FIFO_SIZE, 3900 (adev->gfx.config.sc_prim_fifo_size_frontend << 3901 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3902 (adev->gfx.config.sc_prim_fifo_size_backend << 3903 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3904 (adev->gfx.config.sc_hiz_tile_fifo_size << 3905 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3906 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3907 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3908 3909 tmp = RREG32(mmSPI_ARB_PRIORITY); 3910 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3911 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3912 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3913 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3914 WREG32(mmSPI_ARB_PRIORITY, tmp); 3915 3916 mutex_unlock(&adev->grbm_idx_mutex); 3917 3918 } 3919 3920 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3921 { 3922 u32 i, j, k; 3923 u32 mask; 3924 3925 mutex_lock(&adev->grbm_idx_mutex); 3926 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3927 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3928 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3929 for (k = 0; k < adev->usec_timeout; k++) { 3930 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3931 break; 3932 udelay(1); 3933 } 3934 if (k == adev->usec_timeout) { 3935 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3936 0xffffffff, 0xffffffff); 3937 mutex_unlock(&adev->grbm_idx_mutex); 3938 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3939 i, j); 3940 return; 3941 } 3942 } 3943 } 3944 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3945 mutex_unlock(&adev->grbm_idx_mutex); 3946 3947 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3948 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3949 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3950 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3951 for (k = 0; k < adev->usec_timeout; k++) { 3952 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3953 break; 3954 udelay(1); 3955 } 3956 } 3957 3958 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3959 bool enable) 3960 { 3961 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3962 3963 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3964 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3965 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3966 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3967 3968 WREG32(mmCP_INT_CNTL_RING0, tmp); 3969 } 3970 3971 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3972 { 3973 /* csib */ 3974 WREG32(mmRLC_CSIB_ADDR_HI, 3975 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3976 WREG32(mmRLC_CSIB_ADDR_LO, 3977 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3978 WREG32(mmRLC_CSIB_LENGTH, 3979 adev->gfx.rlc.clear_state_size); 3980 } 3981 3982 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3983 int ind_offset, 3984 int list_size, 3985 int *unique_indices, 3986 int *indices_count, 3987 int max_indices, 3988 int *ind_start_offsets, 3989 int *offset_count, 3990 int max_offset) 3991 { 3992 int indices; 3993 bool new_entry = true; 3994 3995 for (; ind_offset < list_size; ind_offset++) { 3996 3997 if (new_entry) { 3998 new_entry = false; 3999 ind_start_offsets[*offset_count] = ind_offset; 4000 *offset_count = *offset_count + 1; 4001 BUG_ON(*offset_count >= max_offset); 4002 } 4003 4004 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4005 new_entry = true; 4006 continue; 4007 } 4008 4009 ind_offset += 2; 4010 4011 /* look for the matching indice */ 4012 for (indices = 0; 4013 indices < *indices_count; 4014 indices++) { 4015 if (unique_indices[indices] == 4016 register_list_format[ind_offset]) 4017 break; 4018 } 4019 4020 if (indices >= *indices_count) { 4021 unique_indices[*indices_count] = 4022 register_list_format[ind_offset]; 4023 indices = *indices_count; 4024 *indices_count = *indices_count + 1; 4025 BUG_ON(*indices_count >= max_indices); 4026 } 4027 4028 register_list_format[ind_offset] = indices; 4029 } 4030 } 4031 4032 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4033 { 4034 int i, temp, data; 4035 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4036 int indices_count = 0; 4037 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4038 int offset_count = 0; 4039 4040 int list_size; 4041 unsigned int *register_list_format = 4042 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4043 if (!register_list_format) 4044 return -ENOMEM; 4045 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4046 adev->gfx.rlc.reg_list_format_size_bytes); 4047 4048 gfx_v8_0_parse_ind_reg_list(register_list_format, 4049 RLC_FormatDirectRegListLength, 4050 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4051 unique_indices, 4052 &indices_count, 4053 ARRAY_SIZE(unique_indices), 4054 indirect_start_offsets, 4055 &offset_count, 4056 ARRAY_SIZE(indirect_start_offsets)); 4057 4058 /* save and restore list */ 4059 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4060 4061 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4062 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4063 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4064 4065 /* indirect list */ 4066 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4067 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4068 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4069 4070 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4071 list_size = list_size >> 1; 4072 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4073 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4074 4075 /* starting offsets starts */ 4076 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4077 adev->gfx.rlc.starting_offsets_start); 4078 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4079 WREG32(mmRLC_GPM_SCRATCH_DATA, 4080 indirect_start_offsets[i]); 4081 4082 /* unique indices */ 4083 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4084 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4085 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4086 if (unique_indices[i] != 0) { 4087 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4088 WREG32(data + i, unique_indices[i] >> 20); 4089 } 4090 } 4091 kfree(register_list_format); 4092 4093 return 0; 4094 } 4095 4096 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4097 { 4098 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4099 } 4100 4101 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4102 { 4103 uint32_t data; 4104 4105 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4106 4107 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4108 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4109 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4110 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4111 WREG32(mmRLC_PG_DELAY, data); 4112 4113 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4114 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4115 4116 } 4117 4118 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4119 bool enable) 4120 { 4121 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4122 } 4123 4124 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4125 bool enable) 4126 { 4127 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4128 } 4129 4130 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4131 { 4132 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4133 } 4134 4135 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4136 { 4137 if ((adev->asic_type == CHIP_CARRIZO) || 4138 (adev->asic_type == CHIP_STONEY)) { 4139 gfx_v8_0_init_csb(adev); 4140 gfx_v8_0_init_save_restore_list(adev); 4141 gfx_v8_0_enable_save_restore_machine(adev); 4142 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4143 gfx_v8_0_init_power_gating(adev); 4144 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4145 } else if ((adev->asic_type == CHIP_POLARIS11) || 4146 (adev->asic_type == CHIP_POLARIS12) || 4147 (adev->asic_type == CHIP_VEGAM)) { 4148 gfx_v8_0_init_csb(adev); 4149 gfx_v8_0_init_save_restore_list(adev); 4150 gfx_v8_0_enable_save_restore_machine(adev); 4151 gfx_v8_0_init_power_gating(adev); 4152 } 4153 4154 } 4155 4156 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4157 { 4158 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4159 4160 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4161 gfx_v8_0_wait_for_rlc_serdes(adev); 4162 } 4163 4164 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4165 { 4166 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4167 udelay(50); 4168 4169 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4170 udelay(50); 4171 } 4172 4173 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4174 { 4175 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4176 4177 /* carrizo do enable cp interrupt after cp inited */ 4178 if (!(adev->flags & AMD_IS_APU)) 4179 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4180 4181 udelay(50); 4182 } 4183 4184 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4185 { 4186 const struct rlc_firmware_header_v2_0 *hdr; 4187 const __le32 *fw_data; 4188 unsigned i, fw_size; 4189 4190 if (!adev->gfx.rlc_fw) 4191 return -EINVAL; 4192 4193 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4194 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4195 4196 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4197 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4198 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4199 4200 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4201 for (i = 0; i < fw_size; i++) 4202 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4203 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4204 4205 return 0; 4206 } 4207 4208 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4209 { 4210 int r; 4211 u32 tmp; 4212 4213 gfx_v8_0_rlc_stop(adev); 4214 4215 /* disable CG */ 4216 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4217 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4218 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4219 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4220 if (adev->asic_type == CHIP_POLARIS11 || 4221 adev->asic_type == CHIP_POLARIS10 || 4222 adev->asic_type == CHIP_POLARIS12 || 4223 adev->asic_type == CHIP_VEGAM) { 4224 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4225 tmp &= ~0x3; 4226 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4227 } 4228 4229 /* disable PG */ 4230 WREG32(mmRLC_PG_CNTL, 0); 4231 4232 gfx_v8_0_rlc_reset(adev); 4233 gfx_v8_0_init_pg(adev); 4234 4235 4236 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4237 /* legacy rlc firmware loading */ 4238 r = gfx_v8_0_rlc_load_microcode(adev); 4239 if (r) 4240 return r; 4241 } 4242 4243 gfx_v8_0_rlc_start(adev); 4244 4245 return 0; 4246 } 4247 4248 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4249 { 4250 int i; 4251 u32 tmp = RREG32(mmCP_ME_CNTL); 4252 4253 if (enable) { 4254 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4255 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4256 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4257 } else { 4258 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4259 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4260 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4261 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4262 adev->gfx.gfx_ring[i].ready = false; 4263 } 4264 WREG32(mmCP_ME_CNTL, tmp); 4265 udelay(50); 4266 } 4267 4268 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4269 { 4270 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4271 const struct gfx_firmware_header_v1_0 *ce_hdr; 4272 const struct gfx_firmware_header_v1_0 *me_hdr; 4273 const __le32 *fw_data; 4274 unsigned i, fw_size; 4275 4276 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4277 return -EINVAL; 4278 4279 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4280 adev->gfx.pfp_fw->data; 4281 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4282 adev->gfx.ce_fw->data; 4283 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4284 adev->gfx.me_fw->data; 4285 4286 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4287 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4288 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4289 4290 gfx_v8_0_cp_gfx_enable(adev, false); 4291 4292 /* PFP */ 4293 fw_data = (const __le32 *) 4294 (adev->gfx.pfp_fw->data + 4295 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4296 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4297 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4298 for (i = 0; i < fw_size; i++) 4299 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4300 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4301 4302 /* CE */ 4303 fw_data = (const __le32 *) 4304 (adev->gfx.ce_fw->data + 4305 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4306 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4307 WREG32(mmCP_CE_UCODE_ADDR, 0); 4308 for (i = 0; i < fw_size; i++) 4309 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4310 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4311 4312 /* ME */ 4313 fw_data = (const __le32 *) 4314 (adev->gfx.me_fw->data + 4315 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4316 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4317 WREG32(mmCP_ME_RAM_WADDR, 0); 4318 for (i = 0; i < fw_size; i++) 4319 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4320 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4321 4322 return 0; 4323 } 4324 4325 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4326 { 4327 u32 count = 0; 4328 const struct cs_section_def *sect = NULL; 4329 const struct cs_extent_def *ext = NULL; 4330 4331 /* begin clear state */ 4332 count += 2; 4333 /* context control state */ 4334 count += 3; 4335 4336 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4337 for (ext = sect->section; ext->extent != NULL; ++ext) { 4338 if (sect->id == SECT_CONTEXT) 4339 count += 2 + ext->reg_count; 4340 else 4341 return 0; 4342 } 4343 } 4344 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4345 count += 4; 4346 /* end clear state */ 4347 count += 2; 4348 /* clear state */ 4349 count += 2; 4350 4351 return count; 4352 } 4353 4354 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4355 { 4356 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4357 const struct cs_section_def *sect = NULL; 4358 const struct cs_extent_def *ext = NULL; 4359 int r, i; 4360 4361 /* init the CP */ 4362 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4363 WREG32(mmCP_ENDIAN_SWAP, 0); 4364 WREG32(mmCP_DEVICE_ID, 1); 4365 4366 gfx_v8_0_cp_gfx_enable(adev, true); 4367 4368 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4369 if (r) { 4370 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4371 return r; 4372 } 4373 4374 /* clear state buffer */ 4375 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4376 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4377 4378 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4379 amdgpu_ring_write(ring, 0x80000000); 4380 amdgpu_ring_write(ring, 0x80000000); 4381 4382 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4383 for (ext = sect->section; ext->extent != NULL; ++ext) { 4384 if (sect->id == SECT_CONTEXT) { 4385 amdgpu_ring_write(ring, 4386 PACKET3(PACKET3_SET_CONTEXT_REG, 4387 ext->reg_count)); 4388 amdgpu_ring_write(ring, 4389 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4390 for (i = 0; i < ext->reg_count; i++) 4391 amdgpu_ring_write(ring, ext->extent[i]); 4392 } 4393 } 4394 } 4395 4396 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4397 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4398 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4399 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4400 4401 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4402 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4403 4404 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4405 amdgpu_ring_write(ring, 0); 4406 4407 /* init the CE partitions */ 4408 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4409 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4410 amdgpu_ring_write(ring, 0x8000); 4411 amdgpu_ring_write(ring, 0x8000); 4412 4413 amdgpu_ring_commit(ring); 4414 4415 return 0; 4416 } 4417 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4418 { 4419 u32 tmp; 4420 /* no gfx doorbells on iceland */ 4421 if (adev->asic_type == CHIP_TOPAZ) 4422 return; 4423 4424 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4425 4426 if (ring->use_doorbell) { 4427 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4428 DOORBELL_OFFSET, ring->doorbell_index); 4429 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4430 DOORBELL_HIT, 0); 4431 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4432 DOORBELL_EN, 1); 4433 } else { 4434 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4435 } 4436 4437 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4438 4439 if (adev->flags & AMD_IS_APU) 4440 return; 4441 4442 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4443 DOORBELL_RANGE_LOWER, 4444 AMDGPU_DOORBELL_GFX_RING0); 4445 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4446 4447 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4448 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4449 } 4450 4451 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4452 { 4453 struct amdgpu_ring *ring; 4454 u32 tmp; 4455 u32 rb_bufsz; 4456 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4457 int r; 4458 4459 /* Set the write pointer delay */ 4460 WREG32(mmCP_RB_WPTR_DELAY, 0); 4461 4462 /* set the RB to use vmid 0 */ 4463 WREG32(mmCP_RB_VMID, 0); 4464 4465 /* Set ring buffer size */ 4466 ring = &adev->gfx.gfx_ring[0]; 4467 rb_bufsz = order_base_2(ring->ring_size / 8); 4468 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4469 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4470 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4471 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4472 #ifdef __BIG_ENDIAN 4473 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4474 #endif 4475 WREG32(mmCP_RB0_CNTL, tmp); 4476 4477 /* Initialize the ring buffer's read and write pointers */ 4478 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4479 ring->wptr = 0; 4480 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4481 4482 /* set the wb address wether it's enabled or not */ 4483 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4484 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4485 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4486 4487 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4488 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4489 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4490 mdelay(1); 4491 WREG32(mmCP_RB0_CNTL, tmp); 4492 4493 rb_addr = ring->gpu_addr >> 8; 4494 WREG32(mmCP_RB0_BASE, rb_addr); 4495 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4496 4497 gfx_v8_0_set_cpg_door_bell(adev, ring); 4498 /* start the ring */ 4499 amdgpu_ring_clear_ring(ring); 4500 gfx_v8_0_cp_gfx_start(adev); 4501 ring->ready = true; 4502 r = amdgpu_ring_test_ring(ring); 4503 if (r) 4504 ring->ready = false; 4505 4506 return r; 4507 } 4508 4509 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4510 { 4511 int i; 4512 4513 if (enable) { 4514 WREG32(mmCP_MEC_CNTL, 0); 4515 } else { 4516 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4517 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4518 adev->gfx.compute_ring[i].ready = false; 4519 adev->gfx.kiq.ring.ready = false; 4520 } 4521 udelay(50); 4522 } 4523 4524 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4525 { 4526 const struct gfx_firmware_header_v1_0 *mec_hdr; 4527 const __le32 *fw_data; 4528 unsigned i, fw_size; 4529 4530 if (!adev->gfx.mec_fw) 4531 return -EINVAL; 4532 4533 gfx_v8_0_cp_compute_enable(adev, false); 4534 4535 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4536 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4537 4538 fw_data = (const __le32 *) 4539 (adev->gfx.mec_fw->data + 4540 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4541 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4542 4543 /* MEC1 */ 4544 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4545 for (i = 0; i < fw_size; i++) 4546 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4547 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4548 4549 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4550 if (adev->gfx.mec2_fw) { 4551 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4552 4553 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4554 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4555 4556 fw_data = (const __le32 *) 4557 (adev->gfx.mec2_fw->data + 4558 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4559 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4560 4561 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4562 for (i = 0; i < fw_size; i++) 4563 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4564 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4565 } 4566 4567 return 0; 4568 } 4569 4570 /* KIQ functions */ 4571 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4572 { 4573 uint32_t tmp; 4574 struct amdgpu_device *adev = ring->adev; 4575 4576 /* tell RLC which is KIQ queue */ 4577 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4578 tmp &= 0xffffff00; 4579 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4580 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4581 tmp |= 0x80; 4582 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4583 } 4584 4585 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4586 { 4587 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4588 uint64_t queue_mask = 0; 4589 int r, i; 4590 4591 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4592 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4593 continue; 4594 4595 /* This situation may be hit in the future if a new HW 4596 * generation exposes more than 64 queues. If so, the 4597 * definition of queue_mask needs updating */ 4598 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4599 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4600 break; 4601 } 4602 4603 queue_mask |= (1ull << i); 4604 } 4605 4606 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8); 4607 if (r) { 4608 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4609 return r; 4610 } 4611 /* set resources */ 4612 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4613 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4614 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4615 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4616 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4617 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4618 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4619 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4620 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4621 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4622 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4623 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4624 4625 /* map queues */ 4626 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4627 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4628 amdgpu_ring_write(kiq_ring, 4629 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4630 amdgpu_ring_write(kiq_ring, 4631 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4632 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4633 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4634 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4635 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4636 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4637 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4638 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4639 } 4640 4641 r = amdgpu_ring_test_ring(kiq_ring); 4642 if (r) { 4643 DRM_ERROR("KCQ enable failed\n"); 4644 kiq_ring->ready = false; 4645 } 4646 return r; 4647 } 4648 4649 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4650 { 4651 int i, r = 0; 4652 4653 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4654 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4655 for (i = 0; i < adev->usec_timeout; i++) { 4656 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4657 break; 4658 udelay(1); 4659 } 4660 if (i == adev->usec_timeout) 4661 r = -ETIMEDOUT; 4662 } 4663 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4664 WREG32(mmCP_HQD_PQ_RPTR, 0); 4665 WREG32(mmCP_HQD_PQ_WPTR, 0); 4666 4667 return r; 4668 } 4669 4670 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4671 { 4672 struct amdgpu_device *adev = ring->adev; 4673 struct vi_mqd *mqd = ring->mqd_ptr; 4674 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4675 uint32_t tmp; 4676 4677 mqd->header = 0xC0310800; 4678 mqd->compute_pipelinestat_enable = 0x00000001; 4679 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4680 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4681 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4682 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4683 mqd->compute_misc_reserved = 0x00000003; 4684 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4685 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4686 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4687 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4688 eop_base_addr = ring->eop_gpu_addr >> 8; 4689 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4690 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4691 4692 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4693 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4694 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4695 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4696 4697 mqd->cp_hqd_eop_control = tmp; 4698 4699 /* enable doorbell? */ 4700 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4701 CP_HQD_PQ_DOORBELL_CONTROL, 4702 DOORBELL_EN, 4703 ring->use_doorbell ? 1 : 0); 4704 4705 mqd->cp_hqd_pq_doorbell_control = tmp; 4706 4707 /* set the pointer to the MQD */ 4708 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4709 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4710 4711 /* set MQD vmid to 0 */ 4712 tmp = RREG32(mmCP_MQD_CONTROL); 4713 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4714 mqd->cp_mqd_control = tmp; 4715 4716 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4717 hqd_gpu_addr = ring->gpu_addr >> 8; 4718 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4719 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4720 4721 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4722 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4723 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4724 (order_base_2(ring->ring_size / 4) - 1)); 4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4726 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4727 #ifdef __BIG_ENDIAN 4728 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4729 #endif 4730 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4731 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4732 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4733 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4734 mqd->cp_hqd_pq_control = tmp; 4735 4736 /* set the wb address whether it's enabled or not */ 4737 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4738 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4739 mqd->cp_hqd_pq_rptr_report_addr_hi = 4740 upper_32_bits(wb_gpu_addr) & 0xffff; 4741 4742 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4743 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4744 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4745 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4746 4747 tmp = 0; 4748 /* enable the doorbell if requested */ 4749 if (ring->use_doorbell) { 4750 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4751 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4752 DOORBELL_OFFSET, ring->doorbell_index); 4753 4754 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4755 DOORBELL_EN, 1); 4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4757 DOORBELL_SOURCE, 0); 4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4759 DOORBELL_HIT, 0); 4760 } 4761 4762 mqd->cp_hqd_pq_doorbell_control = tmp; 4763 4764 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4765 ring->wptr = 0; 4766 mqd->cp_hqd_pq_wptr = ring->wptr; 4767 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4768 4769 /* set the vmid for the queue */ 4770 mqd->cp_hqd_vmid = 0; 4771 4772 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4774 mqd->cp_hqd_persistent_state = tmp; 4775 4776 /* set MTYPE */ 4777 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4778 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4779 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4780 mqd->cp_hqd_ib_control = tmp; 4781 4782 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4783 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4784 mqd->cp_hqd_iq_timer = tmp; 4785 4786 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4787 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4788 mqd->cp_hqd_ctx_save_control = tmp; 4789 4790 /* defaults */ 4791 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4792 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4793 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4794 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4795 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4796 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4797 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4798 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4799 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4800 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4801 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4802 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4803 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4804 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4805 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4806 4807 /* activate the queue */ 4808 mqd->cp_hqd_active = 1; 4809 4810 return 0; 4811 } 4812 4813 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4814 struct vi_mqd *mqd) 4815 { 4816 uint32_t mqd_reg; 4817 uint32_t *mqd_data; 4818 4819 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4820 mqd_data = &mqd->cp_mqd_base_addr_lo; 4821 4822 /* disable wptr polling */ 4823 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4824 4825 /* program all HQD registers */ 4826 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4827 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4828 4829 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4830 * This is safe since EOP RPTR==WPTR for any inactive HQD 4831 * on ASICs that do not support context-save. 4832 * EOP writes/reads can start anywhere in the ring. 4833 */ 4834 if (adev->asic_type != CHIP_TONGA) { 4835 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4836 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4837 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4838 } 4839 4840 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4841 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4842 4843 /* activate the HQD */ 4844 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4845 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4846 4847 return 0; 4848 } 4849 4850 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4851 { 4852 struct amdgpu_device *adev = ring->adev; 4853 struct vi_mqd *mqd = ring->mqd_ptr; 4854 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4855 4856 gfx_v8_0_kiq_setting(ring); 4857 4858 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4859 /* reset MQD to a clean status */ 4860 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4861 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4862 4863 /* reset ring buffer */ 4864 ring->wptr = 0; 4865 amdgpu_ring_clear_ring(ring); 4866 mutex_lock(&adev->srbm_mutex); 4867 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4868 gfx_v8_0_mqd_commit(adev, mqd); 4869 vi_srbm_select(adev, 0, 0, 0, 0); 4870 mutex_unlock(&adev->srbm_mutex); 4871 } else { 4872 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4873 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4874 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4875 mutex_lock(&adev->srbm_mutex); 4876 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4877 gfx_v8_0_mqd_init(ring); 4878 gfx_v8_0_mqd_commit(adev, mqd); 4879 vi_srbm_select(adev, 0, 0, 0, 0); 4880 mutex_unlock(&adev->srbm_mutex); 4881 4882 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4883 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4884 } 4885 4886 return 0; 4887 } 4888 4889 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4890 { 4891 struct amdgpu_device *adev = ring->adev; 4892 struct vi_mqd *mqd = ring->mqd_ptr; 4893 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4894 4895 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4896 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4897 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4898 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4899 mutex_lock(&adev->srbm_mutex); 4900 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4901 gfx_v8_0_mqd_init(ring); 4902 vi_srbm_select(adev, 0, 0, 0, 0); 4903 mutex_unlock(&adev->srbm_mutex); 4904 4905 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4906 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4907 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4908 /* reset MQD to a clean status */ 4909 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4910 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4911 /* reset ring buffer */ 4912 ring->wptr = 0; 4913 amdgpu_ring_clear_ring(ring); 4914 } else { 4915 amdgpu_ring_clear_ring(ring); 4916 } 4917 return 0; 4918 } 4919 4920 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4921 { 4922 if (adev->asic_type > CHIP_TONGA) { 4923 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4924 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4925 } 4926 /* enable doorbells */ 4927 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4928 } 4929 4930 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4931 { 4932 struct amdgpu_ring *ring; 4933 int r; 4934 4935 ring = &adev->gfx.kiq.ring; 4936 4937 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4938 if (unlikely(r != 0)) 4939 return r; 4940 4941 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4942 if (unlikely(r != 0)) 4943 return r; 4944 4945 gfx_v8_0_kiq_init_queue(ring); 4946 amdgpu_bo_kunmap(ring->mqd_obj); 4947 ring->mqd_ptr = NULL; 4948 amdgpu_bo_unreserve(ring->mqd_obj); 4949 ring->ready = true; 4950 return 0; 4951 } 4952 4953 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) 4954 { 4955 struct amdgpu_ring *ring = NULL; 4956 int r = 0, i; 4957 4958 gfx_v8_0_cp_compute_enable(adev, true); 4959 4960 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4961 ring = &adev->gfx.compute_ring[i]; 4962 4963 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4964 if (unlikely(r != 0)) 4965 goto done; 4966 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4967 if (!r) { 4968 r = gfx_v8_0_kcq_init_queue(ring); 4969 amdgpu_bo_kunmap(ring->mqd_obj); 4970 ring->mqd_ptr = NULL; 4971 } 4972 amdgpu_bo_unreserve(ring->mqd_obj); 4973 if (r) 4974 goto done; 4975 } 4976 4977 gfx_v8_0_set_mec_doorbell_range(adev); 4978 4979 r = gfx_v8_0_kiq_kcq_enable(adev); 4980 if (r) 4981 goto done; 4982 4983 /* Test KCQs */ 4984 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4985 ring = &adev->gfx.compute_ring[i]; 4986 ring->ready = true; 4987 r = amdgpu_ring_test_ring(ring); 4988 if (r) 4989 ring->ready = false; 4990 } 4991 4992 done: 4993 return r; 4994 } 4995 4996 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4997 { 4998 int r; 4999 5000 if (!(adev->flags & AMD_IS_APU)) 5001 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5002 5003 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 5004 /* legacy firmware loading */ 5005 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5006 if (r) 5007 return r; 5008 5009 r = gfx_v8_0_cp_compute_load_microcode(adev); 5010 if (r) 5011 return r; 5012 } 5013 5014 r = gfx_v8_0_kiq_resume(adev); 5015 if (r) 5016 return r; 5017 5018 r = gfx_v8_0_cp_gfx_resume(adev); 5019 if (r) 5020 return r; 5021 5022 r = gfx_v8_0_kcq_resume(adev); 5023 if (r) 5024 return r; 5025 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5026 5027 return 0; 5028 } 5029 5030 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5031 { 5032 gfx_v8_0_cp_gfx_enable(adev, enable); 5033 gfx_v8_0_cp_compute_enable(adev, enable); 5034 } 5035 5036 static int gfx_v8_0_hw_init(void *handle) 5037 { 5038 int r; 5039 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5040 5041 gfx_v8_0_init_golden_registers(adev); 5042 gfx_v8_0_gpu_init(adev); 5043 5044 r = gfx_v8_0_rlc_resume(adev); 5045 if (r) 5046 return r; 5047 5048 r = gfx_v8_0_cp_resume(adev); 5049 5050 return r; 5051 } 5052 5053 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) 5054 { 5055 int r, i; 5056 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 5057 5058 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 5059 if (r) 5060 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5061 5062 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5063 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5064 5065 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5066 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5067 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5068 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5069 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5070 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5071 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5072 amdgpu_ring_write(kiq_ring, 0); 5073 amdgpu_ring_write(kiq_ring, 0); 5074 amdgpu_ring_write(kiq_ring, 0); 5075 } 5076 r = amdgpu_ring_test_ring(kiq_ring); 5077 if (r) 5078 DRM_ERROR("KCQ disable failed\n"); 5079 5080 return r; 5081 } 5082 5083 static int gfx_v8_0_hw_fini(void *handle) 5084 { 5085 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5086 5087 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5088 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5089 5090 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 5091 5092 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 5093 5094 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5095 gfx_v8_0_kcq_disable(adev); 5096 5097 if (amdgpu_sriov_vf(adev)) { 5098 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5099 return 0; 5100 } 5101 gfx_v8_0_cp_enable(adev, false); 5102 gfx_v8_0_rlc_stop(adev); 5103 5104 return 0; 5105 } 5106 5107 static int gfx_v8_0_suspend(void *handle) 5108 { 5109 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5110 adev->gfx.in_suspend = true; 5111 return gfx_v8_0_hw_fini(adev); 5112 } 5113 5114 static int gfx_v8_0_resume(void *handle) 5115 { 5116 int r; 5117 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5118 5119 r = gfx_v8_0_hw_init(adev); 5120 adev->gfx.in_suspend = false; 5121 return r; 5122 } 5123 5124 static bool gfx_v8_0_is_idle(void *handle) 5125 { 5126 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5127 5128 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5129 return false; 5130 else 5131 return true; 5132 } 5133 5134 static int gfx_v8_0_wait_for_idle(void *handle) 5135 { 5136 unsigned i; 5137 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5138 5139 for (i = 0; i < adev->usec_timeout; i++) { 5140 if (gfx_v8_0_is_idle(handle)) 5141 return 0; 5142 5143 udelay(1); 5144 } 5145 return -ETIMEDOUT; 5146 } 5147 5148 static bool gfx_v8_0_check_soft_reset(void *handle) 5149 { 5150 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5151 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5152 u32 tmp; 5153 5154 /* GRBM_STATUS */ 5155 tmp = RREG32(mmGRBM_STATUS); 5156 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5157 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5158 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5159 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5160 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5161 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5162 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5163 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5164 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5165 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5166 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5167 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5168 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5169 } 5170 5171 /* GRBM_STATUS2 */ 5172 tmp = RREG32(mmGRBM_STATUS2); 5173 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5174 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5175 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5176 5177 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5178 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5179 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5180 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5181 SOFT_RESET_CPF, 1); 5182 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5183 SOFT_RESET_CPC, 1); 5184 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5185 SOFT_RESET_CPG, 1); 5186 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5187 SOFT_RESET_GRBM, 1); 5188 } 5189 5190 /* SRBM_STATUS */ 5191 tmp = RREG32(mmSRBM_STATUS); 5192 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5193 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5194 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5195 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5196 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5197 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5198 5199 if (grbm_soft_reset || srbm_soft_reset) { 5200 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5201 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5202 return true; 5203 } else { 5204 adev->gfx.grbm_soft_reset = 0; 5205 adev->gfx.srbm_soft_reset = 0; 5206 return false; 5207 } 5208 } 5209 5210 static int gfx_v8_0_pre_soft_reset(void *handle) 5211 { 5212 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5213 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5214 5215 if ((!adev->gfx.grbm_soft_reset) && 5216 (!adev->gfx.srbm_soft_reset)) 5217 return 0; 5218 5219 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5220 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5221 5222 /* stop the rlc */ 5223 gfx_v8_0_rlc_stop(adev); 5224 5225 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5226 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5227 /* Disable GFX parsing/prefetching */ 5228 gfx_v8_0_cp_gfx_enable(adev, false); 5229 5230 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5231 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5232 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5233 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5234 int i; 5235 5236 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5237 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5238 5239 mutex_lock(&adev->srbm_mutex); 5240 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5241 gfx_v8_0_deactivate_hqd(adev, 2); 5242 vi_srbm_select(adev, 0, 0, 0, 0); 5243 mutex_unlock(&adev->srbm_mutex); 5244 } 5245 /* Disable MEC parsing/prefetching */ 5246 gfx_v8_0_cp_compute_enable(adev, false); 5247 } 5248 5249 return 0; 5250 } 5251 5252 static int gfx_v8_0_soft_reset(void *handle) 5253 { 5254 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5255 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5256 u32 tmp; 5257 5258 if ((!adev->gfx.grbm_soft_reset) && 5259 (!adev->gfx.srbm_soft_reset)) 5260 return 0; 5261 5262 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5263 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5264 5265 if (grbm_soft_reset || srbm_soft_reset) { 5266 tmp = RREG32(mmGMCON_DEBUG); 5267 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5268 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5269 WREG32(mmGMCON_DEBUG, tmp); 5270 udelay(50); 5271 } 5272 5273 if (grbm_soft_reset) { 5274 tmp = RREG32(mmGRBM_SOFT_RESET); 5275 tmp |= grbm_soft_reset; 5276 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5277 WREG32(mmGRBM_SOFT_RESET, tmp); 5278 tmp = RREG32(mmGRBM_SOFT_RESET); 5279 5280 udelay(50); 5281 5282 tmp &= ~grbm_soft_reset; 5283 WREG32(mmGRBM_SOFT_RESET, tmp); 5284 tmp = RREG32(mmGRBM_SOFT_RESET); 5285 } 5286 5287 if (srbm_soft_reset) { 5288 tmp = RREG32(mmSRBM_SOFT_RESET); 5289 tmp |= srbm_soft_reset; 5290 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5291 WREG32(mmSRBM_SOFT_RESET, tmp); 5292 tmp = RREG32(mmSRBM_SOFT_RESET); 5293 5294 udelay(50); 5295 5296 tmp &= ~srbm_soft_reset; 5297 WREG32(mmSRBM_SOFT_RESET, tmp); 5298 tmp = RREG32(mmSRBM_SOFT_RESET); 5299 } 5300 5301 if (grbm_soft_reset || srbm_soft_reset) { 5302 tmp = RREG32(mmGMCON_DEBUG); 5303 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5304 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5305 WREG32(mmGMCON_DEBUG, tmp); 5306 } 5307 5308 /* Wait a little for things to settle down */ 5309 udelay(50); 5310 5311 return 0; 5312 } 5313 5314 static int gfx_v8_0_post_soft_reset(void *handle) 5315 { 5316 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5317 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5318 5319 if ((!adev->gfx.grbm_soft_reset) && 5320 (!adev->gfx.srbm_soft_reset)) 5321 return 0; 5322 5323 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5324 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5325 5326 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5327 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5328 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5329 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5330 int i; 5331 5332 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5333 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5334 5335 mutex_lock(&adev->srbm_mutex); 5336 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5337 gfx_v8_0_deactivate_hqd(adev, 2); 5338 vi_srbm_select(adev, 0, 0, 0, 0); 5339 mutex_unlock(&adev->srbm_mutex); 5340 } 5341 gfx_v8_0_kiq_resume(adev); 5342 gfx_v8_0_kcq_resume(adev); 5343 } 5344 5345 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5346 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5347 gfx_v8_0_cp_gfx_resume(adev); 5348 5349 gfx_v8_0_rlc_start(adev); 5350 5351 return 0; 5352 } 5353 5354 /** 5355 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5356 * 5357 * @adev: amdgpu_device pointer 5358 * 5359 * Fetches a GPU clock counter snapshot. 5360 * Returns the 64 bit clock counter snapshot. 5361 */ 5362 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5363 { 5364 uint64_t clock; 5365 5366 mutex_lock(&adev->gfx.gpu_clock_mutex); 5367 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5368 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5369 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5370 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5371 return clock; 5372 } 5373 5374 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5375 uint32_t vmid, 5376 uint32_t gds_base, uint32_t gds_size, 5377 uint32_t gws_base, uint32_t gws_size, 5378 uint32_t oa_base, uint32_t oa_size) 5379 { 5380 /* GDS Base */ 5381 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5382 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5383 WRITE_DATA_DST_SEL(0))); 5384 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5385 amdgpu_ring_write(ring, 0); 5386 amdgpu_ring_write(ring, gds_base); 5387 5388 /* GDS Size */ 5389 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5390 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5391 WRITE_DATA_DST_SEL(0))); 5392 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5393 amdgpu_ring_write(ring, 0); 5394 amdgpu_ring_write(ring, gds_size); 5395 5396 /* GWS */ 5397 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5398 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5399 WRITE_DATA_DST_SEL(0))); 5400 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5401 amdgpu_ring_write(ring, 0); 5402 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5403 5404 /* OA */ 5405 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5406 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5407 WRITE_DATA_DST_SEL(0))); 5408 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5409 amdgpu_ring_write(ring, 0); 5410 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5411 } 5412 5413 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5414 { 5415 WREG32(mmSQ_IND_INDEX, 5416 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5417 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5418 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5419 (SQ_IND_INDEX__FORCE_READ_MASK)); 5420 return RREG32(mmSQ_IND_DATA); 5421 } 5422 5423 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5424 uint32_t wave, uint32_t thread, 5425 uint32_t regno, uint32_t num, uint32_t *out) 5426 { 5427 WREG32(mmSQ_IND_INDEX, 5428 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5429 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5430 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5431 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5432 (SQ_IND_INDEX__FORCE_READ_MASK) | 5433 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5434 while (num--) 5435 *(out++) = RREG32(mmSQ_IND_DATA); 5436 } 5437 5438 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5439 { 5440 /* type 0 wave data */ 5441 dst[(*no_fields)++] = 0; 5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5449 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5450 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5451 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5452 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5453 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5454 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5455 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5456 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5457 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5458 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5459 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5460 } 5461 5462 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5463 uint32_t wave, uint32_t start, 5464 uint32_t size, uint32_t *dst) 5465 { 5466 wave_read_regs( 5467 adev, simd, wave, 0, 5468 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5469 } 5470 5471 5472 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5473 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5474 .select_se_sh = &gfx_v8_0_select_se_sh, 5475 .read_wave_data = &gfx_v8_0_read_wave_data, 5476 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5477 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5478 }; 5479 5480 static int gfx_v8_0_early_init(void *handle) 5481 { 5482 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5483 5484 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5485 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5486 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5487 gfx_v8_0_set_ring_funcs(adev); 5488 gfx_v8_0_set_irq_funcs(adev); 5489 gfx_v8_0_set_gds_init(adev); 5490 gfx_v8_0_set_rlc_funcs(adev); 5491 5492 return 0; 5493 } 5494 5495 static int gfx_v8_0_late_init(void *handle) 5496 { 5497 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5498 int r; 5499 5500 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5501 if (r) 5502 return r; 5503 5504 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5505 if (r) 5506 return r; 5507 5508 /* requires IBs so do in late init after IB pool is initialized */ 5509 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5510 if (r) 5511 return r; 5512 5513 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5514 if (r) { 5515 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5516 return r; 5517 } 5518 5519 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5520 if (r) { 5521 DRM_ERROR( 5522 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5523 r); 5524 return r; 5525 } 5526 5527 return 0; 5528 } 5529 5530 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5531 bool enable) 5532 { 5533 if (((adev->asic_type == CHIP_POLARIS11) || 5534 (adev->asic_type == CHIP_POLARIS12) || 5535 (adev->asic_type == CHIP_VEGAM)) && 5536 adev->powerplay.pp_funcs->set_powergating_by_smu) 5537 /* Send msg to SMU via Powerplay */ 5538 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5539 5540 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5541 } 5542 5543 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5544 bool enable) 5545 { 5546 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5547 } 5548 5549 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5550 bool enable) 5551 { 5552 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5553 } 5554 5555 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5556 bool enable) 5557 { 5558 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5559 } 5560 5561 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5562 bool enable) 5563 { 5564 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5565 5566 /* Read any GFX register to wake up GFX. */ 5567 if (!enable) 5568 RREG32(mmDB_RENDER_CONTROL); 5569 } 5570 5571 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5572 bool enable) 5573 { 5574 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5575 cz_enable_gfx_cg_power_gating(adev, true); 5576 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5577 cz_enable_gfx_pipeline_power_gating(adev, true); 5578 } else { 5579 cz_enable_gfx_cg_power_gating(adev, false); 5580 cz_enable_gfx_pipeline_power_gating(adev, false); 5581 } 5582 } 5583 5584 static int gfx_v8_0_set_powergating_state(void *handle, 5585 enum amd_powergating_state state) 5586 { 5587 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5588 bool enable = (state == AMD_PG_STATE_GATE); 5589 5590 if (amdgpu_sriov_vf(adev)) 5591 return 0; 5592 5593 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5594 AMD_PG_SUPPORT_RLC_SMU_HS | 5595 AMD_PG_SUPPORT_CP | 5596 AMD_PG_SUPPORT_GFX_DMG)) 5597 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5598 switch (adev->asic_type) { 5599 case CHIP_CARRIZO: 5600 case CHIP_STONEY: 5601 5602 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5603 cz_enable_sck_slow_down_on_power_up(adev, true); 5604 cz_enable_sck_slow_down_on_power_down(adev, true); 5605 } else { 5606 cz_enable_sck_slow_down_on_power_up(adev, false); 5607 cz_enable_sck_slow_down_on_power_down(adev, false); 5608 } 5609 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5610 cz_enable_cp_power_gating(adev, true); 5611 else 5612 cz_enable_cp_power_gating(adev, false); 5613 5614 cz_update_gfx_cg_power_gating(adev, enable); 5615 5616 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5617 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5618 else 5619 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5620 5621 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5622 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5623 else 5624 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5625 break; 5626 case CHIP_POLARIS11: 5627 case CHIP_POLARIS12: 5628 case CHIP_VEGAM: 5629 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5630 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5631 else 5632 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5633 5634 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5635 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5636 else 5637 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5638 5639 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5640 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5641 else 5642 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5643 break; 5644 default: 5645 break; 5646 } 5647 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5648 AMD_PG_SUPPORT_RLC_SMU_HS | 5649 AMD_PG_SUPPORT_CP | 5650 AMD_PG_SUPPORT_GFX_DMG)) 5651 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5652 return 0; 5653 } 5654 5655 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5656 { 5657 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5658 int data; 5659 5660 if (amdgpu_sriov_vf(adev)) 5661 *flags = 0; 5662 5663 /* AMD_CG_SUPPORT_GFX_MGCG */ 5664 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5665 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5666 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5667 5668 /* AMD_CG_SUPPORT_GFX_CGLG */ 5669 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5670 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5671 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5672 5673 /* AMD_CG_SUPPORT_GFX_CGLS */ 5674 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5675 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5676 5677 /* AMD_CG_SUPPORT_GFX_CGTS */ 5678 data = RREG32(mmCGTS_SM_CTRL_REG); 5679 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5680 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5681 5682 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5683 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5684 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5685 5686 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5687 data = RREG32(mmRLC_MEM_SLP_CNTL); 5688 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5689 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5690 5691 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5692 data = RREG32(mmCP_MEM_SLP_CNTL); 5693 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5694 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5695 } 5696 5697 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5698 uint32_t reg_addr, uint32_t cmd) 5699 { 5700 uint32_t data; 5701 5702 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5703 5704 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5705 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5706 5707 data = RREG32(mmRLC_SERDES_WR_CTRL); 5708 if (adev->asic_type == CHIP_STONEY) 5709 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5710 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5711 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5712 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5713 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5714 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5715 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5716 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5717 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5718 else 5719 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5720 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5721 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5722 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5723 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5724 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5725 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5726 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5727 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5728 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5729 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5730 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5731 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5732 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5733 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5734 5735 WREG32(mmRLC_SERDES_WR_CTRL, data); 5736 } 5737 5738 #define MSG_ENTER_RLC_SAFE_MODE 1 5739 #define MSG_EXIT_RLC_SAFE_MODE 0 5740 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5741 #define RLC_GPR_REG2__REQ__SHIFT 0 5742 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5743 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5744 5745 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5746 { 5747 u32 data; 5748 unsigned i; 5749 5750 data = RREG32(mmRLC_CNTL); 5751 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5752 return; 5753 5754 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5755 data |= RLC_SAFE_MODE__CMD_MASK; 5756 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5757 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5758 WREG32(mmRLC_SAFE_MODE, data); 5759 5760 for (i = 0; i < adev->usec_timeout; i++) { 5761 if ((RREG32(mmRLC_GPM_STAT) & 5762 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5763 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5764 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5765 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5766 break; 5767 udelay(1); 5768 } 5769 5770 for (i = 0; i < adev->usec_timeout; i++) { 5771 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5772 break; 5773 udelay(1); 5774 } 5775 adev->gfx.rlc.in_safe_mode = true; 5776 } 5777 } 5778 5779 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5780 { 5781 u32 data = 0; 5782 unsigned i; 5783 5784 data = RREG32(mmRLC_CNTL); 5785 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5786 return; 5787 5788 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5789 if (adev->gfx.rlc.in_safe_mode) { 5790 data |= RLC_SAFE_MODE__CMD_MASK; 5791 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5792 WREG32(mmRLC_SAFE_MODE, data); 5793 adev->gfx.rlc.in_safe_mode = false; 5794 } 5795 } 5796 5797 for (i = 0; i < adev->usec_timeout; i++) { 5798 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5799 break; 5800 udelay(1); 5801 } 5802 } 5803 5804 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5805 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5806 .exit_safe_mode = iceland_exit_rlc_safe_mode 5807 }; 5808 5809 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5810 bool enable) 5811 { 5812 uint32_t temp, data; 5813 5814 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5815 5816 /* It is disabled by HW by default */ 5817 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5820 /* 1 - RLC memory Light sleep */ 5821 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5822 5823 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5824 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5825 } 5826 5827 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5828 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5829 if (adev->flags & AMD_IS_APU) 5830 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5831 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5832 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5833 else 5834 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5835 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5836 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5837 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5838 5839 if (temp != data) 5840 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5841 5842 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5843 gfx_v8_0_wait_for_rlc_serdes(adev); 5844 5845 /* 5 - clear mgcg override */ 5846 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5847 5848 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5849 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5850 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5851 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5852 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5853 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5854 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5855 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5856 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5857 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5858 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5859 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5860 if (temp != data) 5861 WREG32(mmCGTS_SM_CTRL_REG, data); 5862 } 5863 udelay(50); 5864 5865 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5866 gfx_v8_0_wait_for_rlc_serdes(adev); 5867 } else { 5868 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5869 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5870 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5871 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5872 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5873 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5874 if (temp != data) 5875 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5876 5877 /* 2 - disable MGLS in RLC */ 5878 data = RREG32(mmRLC_MEM_SLP_CNTL); 5879 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5880 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5881 WREG32(mmRLC_MEM_SLP_CNTL, data); 5882 } 5883 5884 /* 3 - disable MGLS in CP */ 5885 data = RREG32(mmCP_MEM_SLP_CNTL); 5886 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5887 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5888 WREG32(mmCP_MEM_SLP_CNTL, data); 5889 } 5890 5891 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5892 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5893 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5894 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5895 if (temp != data) 5896 WREG32(mmCGTS_SM_CTRL_REG, data); 5897 5898 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5899 gfx_v8_0_wait_for_rlc_serdes(adev); 5900 5901 /* 6 - set mgcg override */ 5902 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5903 5904 udelay(50); 5905 5906 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5907 gfx_v8_0_wait_for_rlc_serdes(adev); 5908 } 5909 5910 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5911 } 5912 5913 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5914 bool enable) 5915 { 5916 uint32_t temp, temp1, data, data1; 5917 5918 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5919 5920 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5921 5922 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5923 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5924 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5925 if (temp1 != data1) 5926 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5927 5928 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5929 gfx_v8_0_wait_for_rlc_serdes(adev); 5930 5931 /* 2 - clear cgcg override */ 5932 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5933 5934 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5935 gfx_v8_0_wait_for_rlc_serdes(adev); 5936 5937 /* 3 - write cmd to set CGLS */ 5938 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5939 5940 /* 4 - enable cgcg */ 5941 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5942 5943 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5944 /* enable cgls*/ 5945 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5946 5947 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5948 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5949 5950 if (temp1 != data1) 5951 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5952 } else { 5953 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5954 } 5955 5956 if (temp != data) 5957 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5958 5959 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5960 * Cmp_busy/GFX_Idle interrupts 5961 */ 5962 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5963 } else { 5964 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5965 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5966 5967 /* TEST CGCG */ 5968 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5969 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5970 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5971 if (temp1 != data1) 5972 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5973 5974 /* read gfx register to wake up cgcg */ 5975 RREG32(mmCB_CGTT_SCLK_CTRL); 5976 RREG32(mmCB_CGTT_SCLK_CTRL); 5977 RREG32(mmCB_CGTT_SCLK_CTRL); 5978 RREG32(mmCB_CGTT_SCLK_CTRL); 5979 5980 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5981 gfx_v8_0_wait_for_rlc_serdes(adev); 5982 5983 /* write cmd to Set CGCG Overrride */ 5984 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5985 5986 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5987 gfx_v8_0_wait_for_rlc_serdes(adev); 5988 5989 /* write cmd to Clear CGLS */ 5990 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5991 5992 /* disable cgcg, cgls should be disabled too. */ 5993 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5994 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5995 if (temp != data) 5996 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5997 /* enable interrupts again for PG */ 5998 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5999 } 6000 6001 gfx_v8_0_wait_for_rlc_serdes(adev); 6002 6003 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6004 } 6005 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6006 bool enable) 6007 { 6008 if (enable) { 6009 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6010 * === MGCG + MGLS + TS(CG/LS) === 6011 */ 6012 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6013 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6014 } else { 6015 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6016 * === CGCG + CGLS === 6017 */ 6018 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6019 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6020 } 6021 return 0; 6022 } 6023 6024 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6025 enum amd_clockgating_state state) 6026 { 6027 uint32_t msg_id, pp_state = 0; 6028 uint32_t pp_support_state = 0; 6029 6030 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6031 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6032 pp_support_state = PP_STATE_SUPPORT_LS; 6033 pp_state = PP_STATE_LS; 6034 } 6035 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6036 pp_support_state |= PP_STATE_SUPPORT_CG; 6037 pp_state |= PP_STATE_CG; 6038 } 6039 if (state == AMD_CG_STATE_UNGATE) 6040 pp_state = 0; 6041 6042 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6043 PP_BLOCK_GFX_CG, 6044 pp_support_state, 6045 pp_state); 6046 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6047 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6048 } 6049 6050 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6051 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6052 pp_support_state = PP_STATE_SUPPORT_LS; 6053 pp_state = PP_STATE_LS; 6054 } 6055 6056 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6057 pp_support_state |= PP_STATE_SUPPORT_CG; 6058 pp_state |= PP_STATE_CG; 6059 } 6060 6061 if (state == AMD_CG_STATE_UNGATE) 6062 pp_state = 0; 6063 6064 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6065 PP_BLOCK_GFX_MG, 6066 pp_support_state, 6067 pp_state); 6068 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6069 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6070 } 6071 6072 return 0; 6073 } 6074 6075 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6076 enum amd_clockgating_state state) 6077 { 6078 6079 uint32_t msg_id, pp_state = 0; 6080 uint32_t pp_support_state = 0; 6081 6082 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6083 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6084 pp_support_state = PP_STATE_SUPPORT_LS; 6085 pp_state = PP_STATE_LS; 6086 } 6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6088 pp_support_state |= PP_STATE_SUPPORT_CG; 6089 pp_state |= PP_STATE_CG; 6090 } 6091 if (state == AMD_CG_STATE_UNGATE) 6092 pp_state = 0; 6093 6094 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6095 PP_BLOCK_GFX_CG, 6096 pp_support_state, 6097 pp_state); 6098 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6099 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6100 } 6101 6102 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6103 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6104 pp_support_state = PP_STATE_SUPPORT_LS; 6105 pp_state = PP_STATE_LS; 6106 } 6107 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6108 pp_support_state |= PP_STATE_SUPPORT_CG; 6109 pp_state |= PP_STATE_CG; 6110 } 6111 if (state == AMD_CG_STATE_UNGATE) 6112 pp_state = 0; 6113 6114 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6115 PP_BLOCK_GFX_3D, 6116 pp_support_state, 6117 pp_state); 6118 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6119 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6120 } 6121 6122 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6123 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6124 pp_support_state = PP_STATE_SUPPORT_LS; 6125 pp_state = PP_STATE_LS; 6126 } 6127 6128 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6129 pp_support_state |= PP_STATE_SUPPORT_CG; 6130 pp_state |= PP_STATE_CG; 6131 } 6132 6133 if (state == AMD_CG_STATE_UNGATE) 6134 pp_state = 0; 6135 6136 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6137 PP_BLOCK_GFX_MG, 6138 pp_support_state, 6139 pp_state); 6140 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6141 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6142 } 6143 6144 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6145 pp_support_state = PP_STATE_SUPPORT_LS; 6146 6147 if (state == AMD_CG_STATE_UNGATE) 6148 pp_state = 0; 6149 else 6150 pp_state = PP_STATE_LS; 6151 6152 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6153 PP_BLOCK_GFX_RLC, 6154 pp_support_state, 6155 pp_state); 6156 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6157 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6158 } 6159 6160 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6161 pp_support_state = PP_STATE_SUPPORT_LS; 6162 6163 if (state == AMD_CG_STATE_UNGATE) 6164 pp_state = 0; 6165 else 6166 pp_state = PP_STATE_LS; 6167 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6168 PP_BLOCK_GFX_CP, 6169 pp_support_state, 6170 pp_state); 6171 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6172 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6173 } 6174 6175 return 0; 6176 } 6177 6178 static int gfx_v8_0_set_clockgating_state(void *handle, 6179 enum amd_clockgating_state state) 6180 { 6181 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6182 6183 if (amdgpu_sriov_vf(adev)) 6184 return 0; 6185 6186 switch (adev->asic_type) { 6187 case CHIP_FIJI: 6188 case CHIP_CARRIZO: 6189 case CHIP_STONEY: 6190 gfx_v8_0_update_gfx_clock_gating(adev, 6191 state == AMD_CG_STATE_GATE); 6192 break; 6193 case CHIP_TONGA: 6194 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6195 break; 6196 case CHIP_POLARIS10: 6197 case CHIP_POLARIS11: 6198 case CHIP_POLARIS12: 6199 case CHIP_VEGAM: 6200 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6201 break; 6202 default: 6203 break; 6204 } 6205 return 0; 6206 } 6207 6208 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6209 { 6210 return ring->adev->wb.wb[ring->rptr_offs]; 6211 } 6212 6213 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6214 { 6215 struct amdgpu_device *adev = ring->adev; 6216 6217 if (ring->use_doorbell) 6218 /* XXX check if swapping is necessary on BE */ 6219 return ring->adev->wb.wb[ring->wptr_offs]; 6220 else 6221 return RREG32(mmCP_RB0_WPTR); 6222 } 6223 6224 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6225 { 6226 struct amdgpu_device *adev = ring->adev; 6227 6228 if (ring->use_doorbell) { 6229 /* XXX check if swapping is necessary on BE */ 6230 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6231 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6232 } else { 6233 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6234 (void)RREG32(mmCP_RB0_WPTR); 6235 } 6236 } 6237 6238 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6239 { 6240 u32 ref_and_mask, reg_mem_engine; 6241 6242 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6243 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6244 switch (ring->me) { 6245 case 1: 6246 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6247 break; 6248 case 2: 6249 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6250 break; 6251 default: 6252 return; 6253 } 6254 reg_mem_engine = 0; 6255 } else { 6256 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6257 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6258 } 6259 6260 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6261 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6262 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6263 reg_mem_engine)); 6264 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6265 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6266 amdgpu_ring_write(ring, ref_and_mask); 6267 amdgpu_ring_write(ring, ref_and_mask); 6268 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6269 } 6270 6271 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6272 { 6273 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6274 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6275 EVENT_INDEX(4)); 6276 6277 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6278 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6279 EVENT_INDEX(0)); 6280 } 6281 6282 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6283 struct amdgpu_ib *ib, 6284 unsigned vmid, bool ctx_switch) 6285 { 6286 u32 header, control = 0; 6287 6288 if (ib->flags & AMDGPU_IB_FLAG_CE) 6289 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6290 else 6291 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6292 6293 control |= ib->length_dw | (vmid << 24); 6294 6295 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6296 control |= INDIRECT_BUFFER_PRE_ENB(1); 6297 6298 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6299 gfx_v8_0_ring_emit_de_meta(ring); 6300 } 6301 6302 amdgpu_ring_write(ring, header); 6303 amdgpu_ring_write(ring, 6304 #ifdef __BIG_ENDIAN 6305 (2 << 0) | 6306 #endif 6307 (ib->gpu_addr & 0xFFFFFFFC)); 6308 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6309 amdgpu_ring_write(ring, control); 6310 } 6311 6312 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6313 struct amdgpu_ib *ib, 6314 unsigned vmid, bool ctx_switch) 6315 { 6316 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6317 6318 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6319 amdgpu_ring_write(ring, 6320 #ifdef __BIG_ENDIAN 6321 (2 << 0) | 6322 #endif 6323 (ib->gpu_addr & 0xFFFFFFFC)); 6324 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6325 amdgpu_ring_write(ring, control); 6326 } 6327 6328 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6329 u64 seq, unsigned flags) 6330 { 6331 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6332 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6333 6334 /* EVENT_WRITE_EOP - flush caches, send int */ 6335 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6336 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6337 EOP_TC_ACTION_EN | 6338 EOP_TC_WB_ACTION_EN | 6339 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6340 EVENT_INDEX(5))); 6341 amdgpu_ring_write(ring, addr & 0xfffffffc); 6342 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6343 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6344 amdgpu_ring_write(ring, lower_32_bits(seq)); 6345 amdgpu_ring_write(ring, upper_32_bits(seq)); 6346 6347 } 6348 6349 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6350 { 6351 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6352 uint32_t seq = ring->fence_drv.sync_seq; 6353 uint64_t addr = ring->fence_drv.gpu_addr; 6354 6355 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6356 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6357 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6358 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6359 amdgpu_ring_write(ring, addr & 0xfffffffc); 6360 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6361 amdgpu_ring_write(ring, seq); 6362 amdgpu_ring_write(ring, 0xffffffff); 6363 amdgpu_ring_write(ring, 4); /* poll interval */ 6364 } 6365 6366 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6367 unsigned vmid, uint64_t pd_addr) 6368 { 6369 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6370 6371 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6372 6373 /* wait for the invalidate to complete */ 6374 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6375 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6376 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6377 WAIT_REG_MEM_ENGINE(0))); /* me */ 6378 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6379 amdgpu_ring_write(ring, 0); 6380 amdgpu_ring_write(ring, 0); /* ref */ 6381 amdgpu_ring_write(ring, 0); /* mask */ 6382 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6383 6384 /* compute doesn't have PFP */ 6385 if (usepfp) { 6386 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6387 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6388 amdgpu_ring_write(ring, 0x0); 6389 } 6390 } 6391 6392 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6393 { 6394 return ring->adev->wb.wb[ring->wptr_offs]; 6395 } 6396 6397 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6398 { 6399 struct amdgpu_device *adev = ring->adev; 6400 6401 /* XXX check if swapping is necessary on BE */ 6402 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6403 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6404 } 6405 6406 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6407 bool acquire) 6408 { 6409 struct amdgpu_device *adev = ring->adev; 6410 int pipe_num, tmp, reg; 6411 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6412 6413 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6414 6415 /* first me only has 2 entries, GFX and HP3D */ 6416 if (ring->me > 0) 6417 pipe_num -= 2; 6418 6419 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6420 tmp = RREG32(reg); 6421 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6422 WREG32(reg, tmp); 6423 } 6424 6425 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6426 struct amdgpu_ring *ring, 6427 bool acquire) 6428 { 6429 int i, pipe; 6430 bool reserve; 6431 struct amdgpu_ring *iring; 6432 6433 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6434 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6435 if (acquire) 6436 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6437 else 6438 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6439 6440 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6441 /* Clear all reservations - everyone reacquires all resources */ 6442 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6443 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6444 true); 6445 6446 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6447 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6448 true); 6449 } else { 6450 /* Lower all pipes without a current reservation */ 6451 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6452 iring = &adev->gfx.gfx_ring[i]; 6453 pipe = amdgpu_gfx_queue_to_bit(adev, 6454 iring->me, 6455 iring->pipe, 6456 0); 6457 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6458 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6459 } 6460 6461 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6462 iring = &adev->gfx.compute_ring[i]; 6463 pipe = amdgpu_gfx_queue_to_bit(adev, 6464 iring->me, 6465 iring->pipe, 6466 0); 6467 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6468 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6469 } 6470 } 6471 6472 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6473 } 6474 6475 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6476 struct amdgpu_ring *ring, 6477 bool acquire) 6478 { 6479 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6480 uint32_t queue_priority = acquire ? 0xf : 0x0; 6481 6482 mutex_lock(&adev->srbm_mutex); 6483 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6484 6485 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6486 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6487 6488 vi_srbm_select(adev, 0, 0, 0, 0); 6489 mutex_unlock(&adev->srbm_mutex); 6490 } 6491 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6492 enum drm_sched_priority priority) 6493 { 6494 struct amdgpu_device *adev = ring->adev; 6495 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6496 6497 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6498 return; 6499 6500 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6501 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6502 } 6503 6504 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6505 u64 addr, u64 seq, 6506 unsigned flags) 6507 { 6508 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6509 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6510 6511 /* RELEASE_MEM - flush caches, send int */ 6512 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6513 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6514 EOP_TC_ACTION_EN | 6515 EOP_TC_WB_ACTION_EN | 6516 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6517 EVENT_INDEX(5))); 6518 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6519 amdgpu_ring_write(ring, addr & 0xfffffffc); 6520 amdgpu_ring_write(ring, upper_32_bits(addr)); 6521 amdgpu_ring_write(ring, lower_32_bits(seq)); 6522 amdgpu_ring_write(ring, upper_32_bits(seq)); 6523 } 6524 6525 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6526 u64 seq, unsigned int flags) 6527 { 6528 /* we only allocate 32bit for each seq wb address */ 6529 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6530 6531 /* write fence seq to the "addr" */ 6532 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6533 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6534 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6535 amdgpu_ring_write(ring, lower_32_bits(addr)); 6536 amdgpu_ring_write(ring, upper_32_bits(addr)); 6537 amdgpu_ring_write(ring, lower_32_bits(seq)); 6538 6539 if (flags & AMDGPU_FENCE_FLAG_INT) { 6540 /* set register to trigger INT */ 6541 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6542 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6543 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6544 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6545 amdgpu_ring_write(ring, 0); 6546 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6547 } 6548 } 6549 6550 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6551 { 6552 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6553 amdgpu_ring_write(ring, 0); 6554 } 6555 6556 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6557 { 6558 uint32_t dw2 = 0; 6559 6560 if (amdgpu_sriov_vf(ring->adev)) 6561 gfx_v8_0_ring_emit_ce_meta(ring); 6562 6563 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6564 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6565 gfx_v8_0_ring_emit_vgt_flush(ring); 6566 /* set load_global_config & load_global_uconfig */ 6567 dw2 |= 0x8001; 6568 /* set load_cs_sh_regs */ 6569 dw2 |= 0x01000000; 6570 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6571 dw2 |= 0x10002; 6572 6573 /* set load_ce_ram if preamble presented */ 6574 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6575 dw2 |= 0x10000000; 6576 } else { 6577 /* still load_ce_ram if this is the first time preamble presented 6578 * although there is no context switch happens. 6579 */ 6580 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6581 dw2 |= 0x10000000; 6582 } 6583 6584 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6585 amdgpu_ring_write(ring, dw2); 6586 amdgpu_ring_write(ring, 0); 6587 } 6588 6589 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6590 { 6591 unsigned ret; 6592 6593 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6594 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6595 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6596 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6597 ret = ring->wptr & ring->buf_mask; 6598 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6599 return ret; 6600 } 6601 6602 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6603 { 6604 unsigned cur; 6605 6606 BUG_ON(offset > ring->buf_mask); 6607 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6608 6609 cur = (ring->wptr & ring->buf_mask) - 1; 6610 if (likely(cur > offset)) 6611 ring->ring[offset] = cur - offset; 6612 else 6613 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6614 } 6615 6616 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6617 { 6618 struct amdgpu_device *adev = ring->adev; 6619 6620 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6621 amdgpu_ring_write(ring, 0 | /* src: register*/ 6622 (5 << 8) | /* dst: memory */ 6623 (1 << 20)); /* write confirm */ 6624 amdgpu_ring_write(ring, reg); 6625 amdgpu_ring_write(ring, 0); 6626 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6627 adev->virt.reg_val_offs * 4)); 6628 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6629 adev->virt.reg_val_offs * 4)); 6630 } 6631 6632 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6633 uint32_t val) 6634 { 6635 uint32_t cmd; 6636 6637 switch (ring->funcs->type) { 6638 case AMDGPU_RING_TYPE_GFX: 6639 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6640 break; 6641 case AMDGPU_RING_TYPE_KIQ: 6642 cmd = 1 << 16; /* no inc addr */ 6643 break; 6644 default: 6645 cmd = WR_CONFIRM; 6646 break; 6647 } 6648 6649 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6650 amdgpu_ring_write(ring, cmd); 6651 amdgpu_ring_write(ring, reg); 6652 amdgpu_ring_write(ring, 0); 6653 amdgpu_ring_write(ring, val); 6654 } 6655 6656 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 6657 { 6658 struct amdgpu_device *adev = ring->adev; 6659 uint32_t value = 0; 6660 6661 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6662 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6663 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6664 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6665 WREG32(mmSQ_CMD, value); 6666 } 6667 6668 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6669 enum amdgpu_interrupt_state state) 6670 { 6671 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6672 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6673 } 6674 6675 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6676 int me, int pipe, 6677 enum amdgpu_interrupt_state state) 6678 { 6679 u32 mec_int_cntl, mec_int_cntl_reg; 6680 6681 /* 6682 * amdgpu controls only the first MEC. That's why this function only 6683 * handles the setting of interrupts for this specific MEC. All other 6684 * pipes' interrupts are set by amdkfd. 6685 */ 6686 6687 if (me == 1) { 6688 switch (pipe) { 6689 case 0: 6690 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6691 break; 6692 case 1: 6693 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6694 break; 6695 case 2: 6696 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6697 break; 6698 case 3: 6699 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6700 break; 6701 default: 6702 DRM_DEBUG("invalid pipe %d\n", pipe); 6703 return; 6704 } 6705 } else { 6706 DRM_DEBUG("invalid me %d\n", me); 6707 return; 6708 } 6709 6710 switch (state) { 6711 case AMDGPU_IRQ_STATE_DISABLE: 6712 mec_int_cntl = RREG32(mec_int_cntl_reg); 6713 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6714 WREG32(mec_int_cntl_reg, mec_int_cntl); 6715 break; 6716 case AMDGPU_IRQ_STATE_ENABLE: 6717 mec_int_cntl = RREG32(mec_int_cntl_reg); 6718 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6719 WREG32(mec_int_cntl_reg, mec_int_cntl); 6720 break; 6721 default: 6722 break; 6723 } 6724 } 6725 6726 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6727 struct amdgpu_irq_src *source, 6728 unsigned type, 6729 enum amdgpu_interrupt_state state) 6730 { 6731 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6732 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6733 6734 return 0; 6735 } 6736 6737 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6738 struct amdgpu_irq_src *source, 6739 unsigned type, 6740 enum amdgpu_interrupt_state state) 6741 { 6742 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6743 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6744 6745 return 0; 6746 } 6747 6748 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6749 struct amdgpu_irq_src *src, 6750 unsigned type, 6751 enum amdgpu_interrupt_state state) 6752 { 6753 switch (type) { 6754 case AMDGPU_CP_IRQ_GFX_EOP: 6755 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6756 break; 6757 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6758 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6759 break; 6760 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6761 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6762 break; 6763 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6764 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6765 break; 6766 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6767 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6768 break; 6769 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6770 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6771 break; 6772 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6773 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6774 break; 6775 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6776 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6777 break; 6778 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6779 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6780 break; 6781 default: 6782 break; 6783 } 6784 return 0; 6785 } 6786 6787 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6788 struct amdgpu_irq_src *source, 6789 unsigned int type, 6790 enum amdgpu_interrupt_state state) 6791 { 6792 int enable_flag; 6793 6794 switch (state) { 6795 case AMDGPU_IRQ_STATE_DISABLE: 6796 enable_flag = 0; 6797 break; 6798 6799 case AMDGPU_IRQ_STATE_ENABLE: 6800 enable_flag = 1; 6801 break; 6802 6803 default: 6804 return -EINVAL; 6805 } 6806 6807 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6808 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6809 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6810 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6811 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6812 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6813 enable_flag); 6814 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6815 enable_flag); 6816 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6817 enable_flag); 6818 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6819 enable_flag); 6820 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6821 enable_flag); 6822 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6823 enable_flag); 6824 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6825 enable_flag); 6826 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6827 enable_flag); 6828 6829 return 0; 6830 } 6831 6832 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6833 struct amdgpu_irq_src *source, 6834 unsigned int type, 6835 enum amdgpu_interrupt_state state) 6836 { 6837 int enable_flag; 6838 6839 switch (state) { 6840 case AMDGPU_IRQ_STATE_DISABLE: 6841 enable_flag = 1; 6842 break; 6843 6844 case AMDGPU_IRQ_STATE_ENABLE: 6845 enable_flag = 0; 6846 break; 6847 6848 default: 6849 return -EINVAL; 6850 } 6851 6852 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6853 enable_flag); 6854 6855 return 0; 6856 } 6857 6858 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6859 struct amdgpu_irq_src *source, 6860 struct amdgpu_iv_entry *entry) 6861 { 6862 int i; 6863 u8 me_id, pipe_id, queue_id; 6864 struct amdgpu_ring *ring; 6865 6866 DRM_DEBUG("IH: CP EOP\n"); 6867 me_id = (entry->ring_id & 0x0c) >> 2; 6868 pipe_id = (entry->ring_id & 0x03) >> 0; 6869 queue_id = (entry->ring_id & 0x70) >> 4; 6870 6871 switch (me_id) { 6872 case 0: 6873 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6874 break; 6875 case 1: 6876 case 2: 6877 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6878 ring = &adev->gfx.compute_ring[i]; 6879 /* Per-queue interrupt is supported for MEC starting from VI. 6880 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6881 */ 6882 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6883 amdgpu_fence_process(ring); 6884 } 6885 break; 6886 } 6887 return 0; 6888 } 6889 6890 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6891 struct amdgpu_irq_src *source, 6892 struct amdgpu_iv_entry *entry) 6893 { 6894 DRM_ERROR("Illegal register access in command stream\n"); 6895 schedule_work(&adev->reset_work); 6896 return 0; 6897 } 6898 6899 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6900 struct amdgpu_irq_src *source, 6901 struct amdgpu_iv_entry *entry) 6902 { 6903 DRM_ERROR("Illegal instruction in command stream\n"); 6904 schedule_work(&adev->reset_work); 6905 return 0; 6906 } 6907 6908 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6909 struct amdgpu_irq_src *source, 6910 struct amdgpu_iv_entry *entry) 6911 { 6912 DRM_ERROR("CP EDC/ECC error detected."); 6913 return 0; 6914 } 6915 6916 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6917 { 6918 u32 enc, se_id, sh_id, cu_id; 6919 char type[20]; 6920 int sq_edc_source = -1; 6921 6922 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 6923 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 6924 6925 switch (enc) { 6926 case 0: 6927 DRM_INFO("SQ general purpose intr detected:" 6928 "se_id %d, immed_overflow %d, host_reg_overflow %d," 6929 "host_cmd_overflow %d, cmd_timestamp %d," 6930 "reg_timestamp %d, thread_trace_buff_full %d," 6931 "wlt %d, thread_trace %d.\n", 6932 se_id, 6933 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 6934 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 6935 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 6936 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 6937 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 6938 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 6939 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 6940 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 6941 ); 6942 break; 6943 case 1: 6944 case 2: 6945 6946 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 6947 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 6948 6949 /* 6950 * This function can be called either directly from ISR 6951 * or from BH in which case we can access SQ_EDC_INFO 6952 * instance 6953 */ 6954 if (in_task()) { 6955 mutex_lock(&adev->grbm_idx_mutex); 6956 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 6957 6958 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 6959 6960 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6961 mutex_unlock(&adev->grbm_idx_mutex); 6962 } 6963 6964 if (enc == 1) 6965 sprintf(type, "instruction intr"); 6966 else 6967 sprintf(type, "EDC/ECC error"); 6968 6969 DRM_INFO( 6970 "SQ %s detected: " 6971 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 6972 "trap %s, sq_ed_info.source %s.\n", 6973 type, se_id, sh_id, cu_id, 6974 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 6975 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 6976 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 6977 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 6978 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 6979 ); 6980 break; 6981 default: 6982 DRM_ERROR("SQ invalid encoding type\n."); 6983 } 6984 } 6985 6986 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 6987 { 6988 6989 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 6990 struct sq_work *sq_work = container_of(work, struct sq_work, work); 6991 6992 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 6993 } 6994 6995 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 6996 struct amdgpu_irq_src *source, 6997 struct amdgpu_iv_entry *entry) 6998 { 6999 unsigned ih_data = entry->src_data[0]; 7000 7001 /* 7002 * Try to submit work so SQ_EDC_INFO can be accessed from 7003 * BH. If previous work submission hasn't finished yet 7004 * just print whatever info is possible directly from the ISR. 7005 */ 7006 if (work_pending(&adev->gfx.sq_work.work)) { 7007 gfx_v8_0_parse_sq_irq(adev, ih_data); 7008 } else { 7009 adev->gfx.sq_work.ih_data = ih_data; 7010 schedule_work(&adev->gfx.sq_work.work); 7011 } 7012 7013 return 0; 7014 } 7015 7016 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 7017 struct amdgpu_irq_src *src, 7018 unsigned int type, 7019 enum amdgpu_interrupt_state state) 7020 { 7021 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7022 7023 switch (type) { 7024 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 7025 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 7026 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7027 if (ring->me == 1) 7028 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 7029 ring->pipe, 7030 GENERIC2_INT_ENABLE, 7031 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7032 else 7033 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 7034 ring->pipe, 7035 GENERIC2_INT_ENABLE, 7036 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7037 break; 7038 default: 7039 BUG(); /* kiq only support GENERIC2_INT now */ 7040 break; 7041 } 7042 return 0; 7043 } 7044 7045 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7046 struct amdgpu_irq_src *source, 7047 struct amdgpu_iv_entry *entry) 7048 { 7049 u8 me_id, pipe_id, queue_id; 7050 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7051 7052 me_id = (entry->ring_id & 0x0c) >> 2; 7053 pipe_id = (entry->ring_id & 0x03) >> 0; 7054 queue_id = (entry->ring_id & 0x70) >> 4; 7055 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7056 me_id, pipe_id, queue_id); 7057 7058 amdgpu_fence_process(ring); 7059 return 0; 7060 } 7061 7062 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7063 .name = "gfx_v8_0", 7064 .early_init = gfx_v8_0_early_init, 7065 .late_init = gfx_v8_0_late_init, 7066 .sw_init = gfx_v8_0_sw_init, 7067 .sw_fini = gfx_v8_0_sw_fini, 7068 .hw_init = gfx_v8_0_hw_init, 7069 .hw_fini = gfx_v8_0_hw_fini, 7070 .suspend = gfx_v8_0_suspend, 7071 .resume = gfx_v8_0_resume, 7072 .is_idle = gfx_v8_0_is_idle, 7073 .wait_for_idle = gfx_v8_0_wait_for_idle, 7074 .check_soft_reset = gfx_v8_0_check_soft_reset, 7075 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7076 .soft_reset = gfx_v8_0_soft_reset, 7077 .post_soft_reset = gfx_v8_0_post_soft_reset, 7078 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7079 .set_powergating_state = gfx_v8_0_set_powergating_state, 7080 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7081 }; 7082 7083 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7084 .type = AMDGPU_RING_TYPE_GFX, 7085 .align_mask = 0xff, 7086 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7087 .support_64bit_ptrs = false, 7088 .get_rptr = gfx_v8_0_ring_get_rptr, 7089 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7090 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7091 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7092 5 + /* COND_EXEC */ 7093 7 + /* PIPELINE_SYNC */ 7094 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 7095 8 + /* FENCE for VM_FLUSH */ 7096 20 + /* GDS switch */ 7097 4 + /* double SWITCH_BUFFER, 7098 the first COND_EXEC jump to the place just 7099 prior to this double SWITCH_BUFFER */ 7100 5 + /* COND_EXEC */ 7101 7 + /* HDP_flush */ 7102 4 + /* VGT_flush */ 7103 14 + /* CE_META */ 7104 31 + /* DE_META */ 7105 3 + /* CNTX_CTRL */ 7106 5 + /* HDP_INVL */ 7107 8 + 8 + /* FENCE x2 */ 7108 2, /* SWITCH_BUFFER */ 7109 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7110 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7111 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7112 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7113 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7114 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7115 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7116 .test_ring = gfx_v8_0_ring_test_ring, 7117 .test_ib = gfx_v8_0_ring_test_ib, 7118 .insert_nop = amdgpu_ring_insert_nop, 7119 .pad_ib = amdgpu_ring_generic_pad_ib, 7120 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7121 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7122 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7123 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7124 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7125 .soft_recovery = gfx_v8_0_ring_soft_recovery, 7126 }; 7127 7128 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7129 .type = AMDGPU_RING_TYPE_COMPUTE, 7130 .align_mask = 0xff, 7131 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7132 .support_64bit_ptrs = false, 7133 .get_rptr = gfx_v8_0_ring_get_rptr, 7134 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7135 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7136 .emit_frame_size = 7137 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7138 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7139 5 + /* hdp_invalidate */ 7140 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7141 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7142 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7143 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7144 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7145 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7146 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7147 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7148 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7149 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7150 .test_ring = gfx_v8_0_ring_test_ring, 7151 .test_ib = gfx_v8_0_ring_test_ib, 7152 .insert_nop = amdgpu_ring_insert_nop, 7153 .pad_ib = amdgpu_ring_generic_pad_ib, 7154 .set_priority = gfx_v8_0_ring_set_priority_compute, 7155 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7156 }; 7157 7158 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7159 .type = AMDGPU_RING_TYPE_KIQ, 7160 .align_mask = 0xff, 7161 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7162 .support_64bit_ptrs = false, 7163 .get_rptr = gfx_v8_0_ring_get_rptr, 7164 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7165 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7166 .emit_frame_size = 7167 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7168 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7169 5 + /* hdp_invalidate */ 7170 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7171 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7172 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7173 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7174 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7175 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7176 .test_ring = gfx_v8_0_ring_test_ring, 7177 .test_ib = gfx_v8_0_ring_test_ib, 7178 .insert_nop = amdgpu_ring_insert_nop, 7179 .pad_ib = amdgpu_ring_generic_pad_ib, 7180 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7181 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7182 }; 7183 7184 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7185 { 7186 int i; 7187 7188 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7189 7190 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7191 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7192 7193 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7194 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7195 } 7196 7197 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7198 .set = gfx_v8_0_set_eop_interrupt_state, 7199 .process = gfx_v8_0_eop_irq, 7200 }; 7201 7202 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7203 .set = gfx_v8_0_set_priv_reg_fault_state, 7204 .process = gfx_v8_0_priv_reg_irq, 7205 }; 7206 7207 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7208 .set = gfx_v8_0_set_priv_inst_fault_state, 7209 .process = gfx_v8_0_priv_inst_irq, 7210 }; 7211 7212 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7213 .set = gfx_v8_0_kiq_set_interrupt_state, 7214 .process = gfx_v8_0_kiq_irq, 7215 }; 7216 7217 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7218 .set = gfx_v8_0_set_cp_ecc_int_state, 7219 .process = gfx_v8_0_cp_ecc_error_irq, 7220 }; 7221 7222 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7223 .set = gfx_v8_0_set_sq_int_state, 7224 .process = gfx_v8_0_sq_irq, 7225 }; 7226 7227 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7228 { 7229 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7230 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7231 7232 adev->gfx.priv_reg_irq.num_types = 1; 7233 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7234 7235 adev->gfx.priv_inst_irq.num_types = 1; 7236 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7237 7238 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7239 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7240 7241 adev->gfx.cp_ecc_error_irq.num_types = 1; 7242 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7243 7244 adev->gfx.sq_irq.num_types = 1; 7245 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7246 } 7247 7248 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7249 { 7250 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7251 } 7252 7253 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7254 { 7255 /* init asci gds info */ 7256 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7257 adev->gds.gws.total_size = 64; 7258 adev->gds.oa.total_size = 16; 7259 7260 if (adev->gds.mem.total_size == 64 * 1024) { 7261 adev->gds.mem.gfx_partition_size = 4096; 7262 adev->gds.mem.cs_partition_size = 4096; 7263 7264 adev->gds.gws.gfx_partition_size = 4; 7265 adev->gds.gws.cs_partition_size = 4; 7266 7267 adev->gds.oa.gfx_partition_size = 4; 7268 adev->gds.oa.cs_partition_size = 1; 7269 } else { 7270 adev->gds.mem.gfx_partition_size = 1024; 7271 adev->gds.mem.cs_partition_size = 1024; 7272 7273 adev->gds.gws.gfx_partition_size = 16; 7274 adev->gds.gws.cs_partition_size = 16; 7275 7276 adev->gds.oa.gfx_partition_size = 4; 7277 adev->gds.oa.cs_partition_size = 4; 7278 } 7279 } 7280 7281 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7282 u32 bitmap) 7283 { 7284 u32 data; 7285 7286 if (!bitmap) 7287 return; 7288 7289 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7290 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7291 7292 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7293 } 7294 7295 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7296 { 7297 u32 data, mask; 7298 7299 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7300 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7301 7302 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7303 7304 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7305 } 7306 7307 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7308 { 7309 int i, j, k, counter, active_cu_number = 0; 7310 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7311 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7312 unsigned disable_masks[4 * 2]; 7313 u32 ao_cu_num; 7314 7315 memset(cu_info, 0, sizeof(*cu_info)); 7316 7317 if (adev->flags & AMD_IS_APU) 7318 ao_cu_num = 2; 7319 else 7320 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7321 7322 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7323 7324 mutex_lock(&adev->grbm_idx_mutex); 7325 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7326 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7327 mask = 1; 7328 ao_bitmap = 0; 7329 counter = 0; 7330 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7331 if (i < 4 && j < 2) 7332 gfx_v8_0_set_user_cu_inactive_bitmap( 7333 adev, disable_masks[i * 2 + j]); 7334 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7335 cu_info->bitmap[i][j] = bitmap; 7336 7337 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7338 if (bitmap & mask) { 7339 if (counter < ao_cu_num) 7340 ao_bitmap |= mask; 7341 counter ++; 7342 } 7343 mask <<= 1; 7344 } 7345 active_cu_number += counter; 7346 if (i < 2 && j < 2) 7347 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7348 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7349 } 7350 } 7351 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7352 mutex_unlock(&adev->grbm_idx_mutex); 7353 7354 cu_info->number = active_cu_number; 7355 cu_info->ao_cu_mask = ao_cu_mask; 7356 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7357 cu_info->max_waves_per_simd = 10; 7358 cu_info->max_scratch_slots_per_cu = 32; 7359 cu_info->wave_front_size = 64; 7360 cu_info->lds_size = 64; 7361 } 7362 7363 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7364 { 7365 .type = AMD_IP_BLOCK_TYPE_GFX, 7366 .major = 8, 7367 .minor = 0, 7368 .rev = 0, 7369 .funcs = &gfx_v8_0_ip_funcs, 7370 }; 7371 7372 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7373 { 7374 .type = AMD_IP_BLOCK_TYPE_GFX, 7375 .major = 8, 7376 .minor = 1, 7377 .rev = 0, 7378 .funcs = &gfx_v8_0_ip_funcs, 7379 }; 7380 7381 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7382 { 7383 uint64_t ce_payload_addr; 7384 int cnt_ce; 7385 union { 7386 struct vi_ce_ib_state regular; 7387 struct vi_ce_ib_state_chained_ib chained; 7388 } ce_payload = {}; 7389 7390 if (ring->adev->virt.chained_ib_support) { 7391 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7392 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7393 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7394 } else { 7395 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7396 offsetof(struct vi_gfx_meta_data, ce_payload); 7397 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7398 } 7399 7400 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7401 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7402 WRITE_DATA_DST_SEL(8) | 7403 WR_CONFIRM) | 7404 WRITE_DATA_CACHE_POLICY(0)); 7405 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7406 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7407 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7408 } 7409 7410 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7411 { 7412 uint64_t de_payload_addr, gds_addr, csa_addr; 7413 int cnt_de; 7414 union { 7415 struct vi_de_ib_state regular; 7416 struct vi_de_ib_state_chained_ib chained; 7417 } de_payload = {}; 7418 7419 csa_addr = amdgpu_csa_vaddr(ring->adev); 7420 gds_addr = csa_addr + 4096; 7421 if (ring->adev->virt.chained_ib_support) { 7422 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7423 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7424 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7425 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7426 } else { 7427 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7428 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7429 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7430 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7431 } 7432 7433 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7434 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7435 WRITE_DATA_DST_SEL(8) | 7436 WR_CONFIRM) | 7437 WRITE_DATA_CACHE_POLICY(0)); 7438 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7439 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7440 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7441 } 7442