1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vid.h" 29 #include "amdgpu_ucode.h" 30 #include "amdgpu_atombios.h" 31 #include "atombios_i2c.h" 32 #include "clearstate_vi.h" 33 34 #include "gmc/gmc_8_2_d.h" 35 #include "gmc/gmc_8_2_sh_mask.h" 36 37 #include "oss/oss_3_0_d.h" 38 #include "oss/oss_3_0_sh_mask.h" 39 40 #include "bif/bif_5_0_d.h" 41 #include "bif/bif_5_0_sh_mask.h" 42 43 #include "gca/gfx_8_0_d.h" 44 #include "gca/gfx_8_0_enum.h" 45 #include "gca/gfx_8_0_sh_mask.h" 46 #include "gca/gfx_8_0_enum.h" 47 48 #include "dce/dce_10_0_d.h" 49 #include "dce/dce_10_0_sh_mask.h" 50 51 #include "smu/smu_7_1_3_d.h" 52 53 #define GFX8_NUM_GFX_RINGS 1 54 #define GFX8_NUM_COMPUTE_RINGS 8 55 56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 60 61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 70 71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 77 78 /* BPM SERDES CMD */ 79 #define SET_BPM_SERDES_CMD 1 80 #define CLE_BPM_SERDES_CMD 0 81 82 /* BPM Register Address*/ 83 enum { 84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 89 BPM_REG_FGCG_MAX 90 }; 91 92 #define RLC_FormatDirectRegListLength 14 93 94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 100 101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 119 120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 126 127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 133 134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 140 141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 142 { 143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 159 }; 160 161 static const u32 golden_settings_tonga_a11[] = 162 { 163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 165 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 166 mmGB_GPU_ID, 0x0000000f, 0x00000000, 167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 172 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 178 }; 179 180 static const u32 tonga_golden_common_all[] = 181 { 182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 190 }; 191 192 static const u32 tonga_mgcg_cgcg_init[] = 193 { 194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 269 }; 270 271 static const u32 golden_settings_polaris11_a11[] = 272 { 273 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 274 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 275 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 276 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 277 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 278 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 279 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 280 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 281 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 282 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 283 mmSQ_CONFIG, 0x07f80000, 0x01180000, 284 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 285 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 286 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 287 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 288 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 289 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 290 }; 291 292 static const u32 polaris11_golden_common_all[] = 293 { 294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 295 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 296 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 297 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 298 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 299 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 300 }; 301 302 static const u32 golden_settings_polaris10_a11[] = 303 { 304 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 305 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 306 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 307 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 308 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 309 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 310 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 311 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 312 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 313 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 314 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 315 mmSQ_CONFIG, 0x07f80000, 0x07180000, 316 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 317 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 318 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 319 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 321 }; 322 323 static const u32 polaris10_golden_common_all[] = 324 { 325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 326 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 327 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 333 }; 334 335 static const u32 fiji_golden_common_all[] = 336 { 337 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 338 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 339 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 340 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 341 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 342 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 343 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 344 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 346 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 347 }; 348 349 static const u32 golden_settings_fiji_a10[] = 350 { 351 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 352 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 353 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 354 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 355 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 356 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 357 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 358 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 359 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 360 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 361 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 362 }; 363 364 static const u32 fiji_mgcg_cgcg_init[] = 365 { 366 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 367 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 368 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 369 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 370 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 371 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 372 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 373 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 374 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 375 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 376 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 377 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 384 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 385 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 386 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 387 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 388 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 391 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 392 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 393 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 394 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 395 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 396 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 397 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 398 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 399 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 400 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 401 }; 402 403 static const u32 golden_settings_iceland_a11[] = 404 { 405 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 406 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 407 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 408 mmGB_GPU_ID, 0x0000000f, 0x00000000, 409 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 410 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 411 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 412 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 413 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 414 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 415 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 416 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 417 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 418 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 419 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 420 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 421 }; 422 423 static const u32 iceland_golden_common_all[] = 424 { 425 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 426 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 427 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 428 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 429 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 430 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 431 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 432 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 433 }; 434 435 static const u32 iceland_mgcg_cgcg_init[] = 436 { 437 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 438 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 439 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 441 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 442 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 443 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 444 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 446 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 448 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 455 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 456 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 457 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 458 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 459 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 460 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 462 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 463 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 464 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 465 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 466 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 467 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 468 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 469 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 470 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 471 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 472 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 473 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 474 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 475 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 476 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 477 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 478 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 479 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 480 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 481 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 482 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 483 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 484 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 485 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 486 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 487 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 488 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 489 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 490 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 491 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 492 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 493 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 494 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 495 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 496 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 497 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 498 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 499 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 500 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 501 }; 502 503 static const u32 cz_golden_settings_a11[] = 504 { 505 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 506 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 507 mmGB_GPU_ID, 0x0000000f, 0x00000000, 508 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 509 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 510 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 511 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 512 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 513 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 517 }; 518 519 static const u32 cz_golden_common_all[] = 520 { 521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 524 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 529 }; 530 531 static const u32 cz_mgcg_cgcg_init[] = 532 { 533 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 534 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 535 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 536 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 537 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 538 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 539 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 540 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 541 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 542 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 543 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 544 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 551 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 552 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 553 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 554 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 555 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 558 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 559 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 560 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 561 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 562 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 563 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 564 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 565 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 566 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 567 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 568 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 569 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 570 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 571 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 572 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 573 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 574 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 575 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 576 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 577 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 578 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 579 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 580 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 581 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 582 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 583 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 584 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 585 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 586 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 587 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 588 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 589 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 590 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 591 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 592 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 593 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 594 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 595 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 596 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 597 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 598 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 599 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 600 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 601 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 602 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 603 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 604 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 605 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 606 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 607 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 608 }; 609 610 static const u32 stoney_golden_settings_a11[] = 611 { 612 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 613 mmGB_GPU_ID, 0x0000000f, 0x00000000, 614 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 615 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 616 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 617 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 618 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 619 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 620 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 621 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 622 }; 623 624 static const u32 stoney_golden_common_all[] = 625 { 626 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 627 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 628 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 629 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 630 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 631 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 632 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 633 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 634 }; 635 636 static const u32 stoney_mgcg_cgcg_init[] = 637 { 638 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 639 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 640 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 641 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 642 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 643 }; 644 645 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 646 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 647 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 648 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 649 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 650 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 651 652 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 653 { 654 switch (adev->asic_type) { 655 case CHIP_TOPAZ: 656 amdgpu_program_register_sequence(adev, 657 iceland_mgcg_cgcg_init, 658 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 659 amdgpu_program_register_sequence(adev, 660 golden_settings_iceland_a11, 661 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 662 amdgpu_program_register_sequence(adev, 663 iceland_golden_common_all, 664 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 665 break; 666 case CHIP_FIJI: 667 amdgpu_program_register_sequence(adev, 668 fiji_mgcg_cgcg_init, 669 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 670 amdgpu_program_register_sequence(adev, 671 golden_settings_fiji_a10, 672 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 673 amdgpu_program_register_sequence(adev, 674 fiji_golden_common_all, 675 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 676 break; 677 678 case CHIP_TONGA: 679 amdgpu_program_register_sequence(adev, 680 tonga_mgcg_cgcg_init, 681 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 682 amdgpu_program_register_sequence(adev, 683 golden_settings_tonga_a11, 684 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 685 amdgpu_program_register_sequence(adev, 686 tonga_golden_common_all, 687 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 688 break; 689 case CHIP_POLARIS11: 690 amdgpu_program_register_sequence(adev, 691 golden_settings_polaris11_a11, 692 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 693 amdgpu_program_register_sequence(adev, 694 polaris11_golden_common_all, 695 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 696 break; 697 case CHIP_POLARIS10: 698 amdgpu_program_register_sequence(adev, 699 golden_settings_polaris10_a11, 700 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 701 amdgpu_program_register_sequence(adev, 702 polaris10_golden_common_all, 703 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 704 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 705 if (adev->pdev->revision == 0xc7 && 706 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 707 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 708 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 709 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 710 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 711 } 712 break; 713 case CHIP_CARRIZO: 714 amdgpu_program_register_sequence(adev, 715 cz_mgcg_cgcg_init, 716 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 717 amdgpu_program_register_sequence(adev, 718 cz_golden_settings_a11, 719 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 720 amdgpu_program_register_sequence(adev, 721 cz_golden_common_all, 722 (const u32)ARRAY_SIZE(cz_golden_common_all)); 723 break; 724 case CHIP_STONEY: 725 amdgpu_program_register_sequence(adev, 726 stoney_mgcg_cgcg_init, 727 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 728 amdgpu_program_register_sequence(adev, 729 stoney_golden_settings_a11, 730 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 731 amdgpu_program_register_sequence(adev, 732 stoney_golden_common_all, 733 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 734 break; 735 default: 736 break; 737 } 738 } 739 740 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 741 { 742 int i; 743 744 adev->gfx.scratch.num_reg = 7; 745 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 746 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 747 adev->gfx.scratch.free[i] = true; 748 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 749 } 750 } 751 752 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 753 { 754 struct amdgpu_device *adev = ring->adev; 755 uint32_t scratch; 756 uint32_t tmp = 0; 757 unsigned i; 758 int r; 759 760 r = amdgpu_gfx_scratch_get(adev, &scratch); 761 if (r) { 762 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 763 return r; 764 } 765 WREG32(scratch, 0xCAFEDEAD); 766 r = amdgpu_ring_alloc(ring, 3); 767 if (r) { 768 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 769 ring->idx, r); 770 amdgpu_gfx_scratch_free(adev, scratch); 771 return r; 772 } 773 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 774 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 775 amdgpu_ring_write(ring, 0xDEADBEEF); 776 amdgpu_ring_commit(ring); 777 778 for (i = 0; i < adev->usec_timeout; i++) { 779 tmp = RREG32(scratch); 780 if (tmp == 0xDEADBEEF) 781 break; 782 DRM_UDELAY(1); 783 } 784 if (i < adev->usec_timeout) { 785 DRM_INFO("ring test on %d succeeded in %d usecs\n", 786 ring->idx, i); 787 } else { 788 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 789 ring->idx, scratch, tmp); 790 r = -EINVAL; 791 } 792 amdgpu_gfx_scratch_free(adev, scratch); 793 return r; 794 } 795 796 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 797 { 798 struct amdgpu_device *adev = ring->adev; 799 struct amdgpu_ib ib; 800 struct dma_fence *f = NULL; 801 uint32_t scratch; 802 uint32_t tmp = 0; 803 long r; 804 805 r = amdgpu_gfx_scratch_get(adev, &scratch); 806 if (r) { 807 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 808 return r; 809 } 810 WREG32(scratch, 0xCAFEDEAD); 811 memset(&ib, 0, sizeof(ib)); 812 r = amdgpu_ib_get(adev, NULL, 256, &ib); 813 if (r) { 814 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 815 goto err1; 816 } 817 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 818 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 819 ib.ptr[2] = 0xDEADBEEF; 820 ib.length_dw = 3; 821 822 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 823 if (r) 824 goto err2; 825 826 r = dma_fence_wait_timeout(f, false, timeout); 827 if (r == 0) { 828 DRM_ERROR("amdgpu: IB test timed out.\n"); 829 r = -ETIMEDOUT; 830 goto err2; 831 } else if (r < 0) { 832 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 833 goto err2; 834 } 835 tmp = RREG32(scratch); 836 if (tmp == 0xDEADBEEF) { 837 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 838 r = 0; 839 } else { 840 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 841 scratch, tmp); 842 r = -EINVAL; 843 } 844 err2: 845 amdgpu_ib_free(adev, &ib, NULL); 846 dma_fence_put(f); 847 err1: 848 amdgpu_gfx_scratch_free(adev, scratch); 849 return r; 850 } 851 852 853 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 854 release_firmware(adev->gfx.pfp_fw); 855 adev->gfx.pfp_fw = NULL; 856 release_firmware(adev->gfx.me_fw); 857 adev->gfx.me_fw = NULL; 858 release_firmware(adev->gfx.ce_fw); 859 adev->gfx.ce_fw = NULL; 860 release_firmware(adev->gfx.rlc_fw); 861 adev->gfx.rlc_fw = NULL; 862 release_firmware(adev->gfx.mec_fw); 863 adev->gfx.mec_fw = NULL; 864 if ((adev->asic_type != CHIP_STONEY) && 865 (adev->asic_type != CHIP_TOPAZ)) 866 release_firmware(adev->gfx.mec2_fw); 867 adev->gfx.mec2_fw = NULL; 868 869 kfree(adev->gfx.rlc.register_list_format); 870 } 871 872 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 873 { 874 const char *chip_name; 875 char fw_name[30]; 876 int err; 877 struct amdgpu_firmware_info *info = NULL; 878 const struct common_firmware_header *header = NULL; 879 const struct gfx_firmware_header_v1_0 *cp_hdr; 880 const struct rlc_firmware_header_v2_0 *rlc_hdr; 881 unsigned int *tmp = NULL, i; 882 883 DRM_DEBUG("\n"); 884 885 switch (adev->asic_type) { 886 case CHIP_TOPAZ: 887 chip_name = "topaz"; 888 break; 889 case CHIP_TONGA: 890 chip_name = "tonga"; 891 break; 892 case CHIP_CARRIZO: 893 chip_name = "carrizo"; 894 break; 895 case CHIP_FIJI: 896 chip_name = "fiji"; 897 break; 898 case CHIP_POLARIS11: 899 chip_name = "polaris11"; 900 break; 901 case CHIP_POLARIS10: 902 chip_name = "polaris10"; 903 break; 904 case CHIP_STONEY: 905 chip_name = "stoney"; 906 break; 907 default: 908 BUG(); 909 } 910 911 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 912 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 913 if (err) 914 goto out; 915 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 916 if (err) 917 goto out; 918 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 919 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 920 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 921 922 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 923 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 924 if (err) 925 goto out; 926 err = amdgpu_ucode_validate(adev->gfx.me_fw); 927 if (err) 928 goto out; 929 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 930 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 931 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 932 933 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 934 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 935 if (err) 936 goto out; 937 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 938 if (err) 939 goto out; 940 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 941 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 942 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 943 944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 945 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 946 if (err) 947 goto out; 948 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 949 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 950 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 951 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 952 953 adev->gfx.rlc.save_and_restore_offset = 954 le32_to_cpu(rlc_hdr->save_and_restore_offset); 955 adev->gfx.rlc.clear_state_descriptor_offset = 956 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 957 adev->gfx.rlc.avail_scratch_ram_locations = 958 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 959 adev->gfx.rlc.reg_restore_list_size = 960 le32_to_cpu(rlc_hdr->reg_restore_list_size); 961 adev->gfx.rlc.reg_list_format_start = 962 le32_to_cpu(rlc_hdr->reg_list_format_start); 963 adev->gfx.rlc.reg_list_format_separate_start = 964 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 965 adev->gfx.rlc.starting_offsets_start = 966 le32_to_cpu(rlc_hdr->starting_offsets_start); 967 adev->gfx.rlc.reg_list_format_size_bytes = 968 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 969 adev->gfx.rlc.reg_list_size_bytes = 970 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 971 972 adev->gfx.rlc.register_list_format = 973 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 974 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 975 976 if (!adev->gfx.rlc.register_list_format) { 977 err = -ENOMEM; 978 goto out; 979 } 980 981 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 982 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 983 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 984 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 985 986 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 987 988 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 989 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 990 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 991 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 992 993 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 994 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 995 if (err) 996 goto out; 997 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 998 if (err) 999 goto out; 1000 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1001 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1002 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1003 1004 if ((adev->asic_type != CHIP_STONEY) && 1005 (adev->asic_type != CHIP_TOPAZ)) { 1006 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1007 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1008 if (!err) { 1009 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1010 if (err) 1011 goto out; 1012 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1013 adev->gfx.mec2_fw->data; 1014 adev->gfx.mec2_fw_version = 1015 le32_to_cpu(cp_hdr->header.ucode_version); 1016 adev->gfx.mec2_feature_version = 1017 le32_to_cpu(cp_hdr->ucode_feature_version); 1018 } else { 1019 err = 0; 1020 adev->gfx.mec2_fw = NULL; 1021 } 1022 } 1023 1024 if (adev->firmware.smu_load) { 1025 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1026 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1027 info->fw = adev->gfx.pfp_fw; 1028 header = (const struct common_firmware_header *)info->fw->data; 1029 adev->firmware.fw_size += 1030 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1031 1032 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1033 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1034 info->fw = adev->gfx.me_fw; 1035 header = (const struct common_firmware_header *)info->fw->data; 1036 adev->firmware.fw_size += 1037 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1038 1039 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1040 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1041 info->fw = adev->gfx.ce_fw; 1042 header = (const struct common_firmware_header *)info->fw->data; 1043 adev->firmware.fw_size += 1044 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1045 1046 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1047 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1048 info->fw = adev->gfx.rlc_fw; 1049 header = (const struct common_firmware_header *)info->fw->data; 1050 adev->firmware.fw_size += 1051 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1052 1053 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1054 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1055 info->fw = adev->gfx.mec_fw; 1056 header = (const struct common_firmware_header *)info->fw->data; 1057 adev->firmware.fw_size += 1058 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1059 1060 /* we need account JT in */ 1061 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1062 adev->firmware.fw_size += 1063 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1064 1065 if (amdgpu_sriov_vf(adev)) { 1066 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1067 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1068 info->fw = adev->gfx.mec_fw; 1069 adev->firmware.fw_size += 1070 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1071 } 1072 1073 if (adev->gfx.mec2_fw) { 1074 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1075 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1076 info->fw = adev->gfx.mec2_fw; 1077 header = (const struct common_firmware_header *)info->fw->data; 1078 adev->firmware.fw_size += 1079 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1080 } 1081 1082 } 1083 1084 out: 1085 if (err) { 1086 dev_err(adev->dev, 1087 "gfx8: Failed to load firmware \"%s\"\n", 1088 fw_name); 1089 release_firmware(adev->gfx.pfp_fw); 1090 adev->gfx.pfp_fw = NULL; 1091 release_firmware(adev->gfx.me_fw); 1092 adev->gfx.me_fw = NULL; 1093 release_firmware(adev->gfx.ce_fw); 1094 adev->gfx.ce_fw = NULL; 1095 release_firmware(adev->gfx.rlc_fw); 1096 adev->gfx.rlc_fw = NULL; 1097 release_firmware(adev->gfx.mec_fw); 1098 adev->gfx.mec_fw = NULL; 1099 release_firmware(adev->gfx.mec2_fw); 1100 adev->gfx.mec2_fw = NULL; 1101 } 1102 return err; 1103 } 1104 1105 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1106 volatile u32 *buffer) 1107 { 1108 u32 count = 0, i; 1109 const struct cs_section_def *sect = NULL; 1110 const struct cs_extent_def *ext = NULL; 1111 1112 if (adev->gfx.rlc.cs_data == NULL) 1113 return; 1114 if (buffer == NULL) 1115 return; 1116 1117 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1118 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1119 1120 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1121 buffer[count++] = cpu_to_le32(0x80000000); 1122 buffer[count++] = cpu_to_le32(0x80000000); 1123 1124 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1125 for (ext = sect->section; ext->extent != NULL; ++ext) { 1126 if (sect->id == SECT_CONTEXT) { 1127 buffer[count++] = 1128 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1129 buffer[count++] = cpu_to_le32(ext->reg_index - 1130 PACKET3_SET_CONTEXT_REG_START); 1131 for (i = 0; i < ext->reg_count; i++) 1132 buffer[count++] = cpu_to_le32(ext->extent[i]); 1133 } else { 1134 return; 1135 } 1136 } 1137 } 1138 1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1140 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1141 PACKET3_SET_CONTEXT_REG_START); 1142 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1143 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1144 1145 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1146 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1147 1148 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1149 buffer[count++] = cpu_to_le32(0); 1150 } 1151 1152 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1153 { 1154 const __le32 *fw_data; 1155 volatile u32 *dst_ptr; 1156 int me, i, max_me = 4; 1157 u32 bo_offset = 0; 1158 u32 table_offset, table_size; 1159 1160 if (adev->asic_type == CHIP_CARRIZO) 1161 max_me = 5; 1162 1163 /* write the cp table buffer */ 1164 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1165 for (me = 0; me < max_me; me++) { 1166 if (me == 0) { 1167 const struct gfx_firmware_header_v1_0 *hdr = 1168 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1169 fw_data = (const __le32 *) 1170 (adev->gfx.ce_fw->data + 1171 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1172 table_offset = le32_to_cpu(hdr->jt_offset); 1173 table_size = le32_to_cpu(hdr->jt_size); 1174 } else if (me == 1) { 1175 const struct gfx_firmware_header_v1_0 *hdr = 1176 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1177 fw_data = (const __le32 *) 1178 (adev->gfx.pfp_fw->data + 1179 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1180 table_offset = le32_to_cpu(hdr->jt_offset); 1181 table_size = le32_to_cpu(hdr->jt_size); 1182 } else if (me == 2) { 1183 const struct gfx_firmware_header_v1_0 *hdr = 1184 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1185 fw_data = (const __le32 *) 1186 (adev->gfx.me_fw->data + 1187 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1188 table_offset = le32_to_cpu(hdr->jt_offset); 1189 table_size = le32_to_cpu(hdr->jt_size); 1190 } else if (me == 3) { 1191 const struct gfx_firmware_header_v1_0 *hdr = 1192 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1193 fw_data = (const __le32 *) 1194 (adev->gfx.mec_fw->data + 1195 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1196 table_offset = le32_to_cpu(hdr->jt_offset); 1197 table_size = le32_to_cpu(hdr->jt_size); 1198 } else if (me == 4) { 1199 const struct gfx_firmware_header_v1_0 *hdr = 1200 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1201 fw_data = (const __le32 *) 1202 (adev->gfx.mec2_fw->data + 1203 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1204 table_offset = le32_to_cpu(hdr->jt_offset); 1205 table_size = le32_to_cpu(hdr->jt_size); 1206 } 1207 1208 for (i = 0; i < table_size; i ++) { 1209 dst_ptr[bo_offset + i] = 1210 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1211 } 1212 1213 bo_offset += table_size; 1214 } 1215 } 1216 1217 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1218 { 1219 int r; 1220 1221 /* clear state block */ 1222 if (adev->gfx.rlc.clear_state_obj) { 1223 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1224 if (unlikely(r != 0)) 1225 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1226 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1227 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1228 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1229 adev->gfx.rlc.clear_state_obj = NULL; 1230 } 1231 1232 /* jump table block */ 1233 if (adev->gfx.rlc.cp_table_obj) { 1234 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1235 if (unlikely(r != 0)) 1236 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1237 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1238 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1239 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1240 adev->gfx.rlc.cp_table_obj = NULL; 1241 } 1242 } 1243 1244 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1245 { 1246 volatile u32 *dst_ptr; 1247 u32 dws; 1248 const struct cs_section_def *cs_data; 1249 int r; 1250 1251 adev->gfx.rlc.cs_data = vi_cs_data; 1252 1253 cs_data = adev->gfx.rlc.cs_data; 1254 1255 if (cs_data) { 1256 /* clear state block */ 1257 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1258 1259 if (adev->gfx.rlc.clear_state_obj == NULL) { 1260 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1261 AMDGPU_GEM_DOMAIN_VRAM, 1262 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1263 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1264 NULL, NULL, 1265 &adev->gfx.rlc.clear_state_obj); 1266 if (r) { 1267 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1268 gfx_v8_0_rlc_fini(adev); 1269 return r; 1270 } 1271 } 1272 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1273 if (unlikely(r != 0)) { 1274 gfx_v8_0_rlc_fini(adev); 1275 return r; 1276 } 1277 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1278 &adev->gfx.rlc.clear_state_gpu_addr); 1279 if (r) { 1280 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1281 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1282 gfx_v8_0_rlc_fini(adev); 1283 return r; 1284 } 1285 1286 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1287 if (r) { 1288 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1289 gfx_v8_0_rlc_fini(adev); 1290 return r; 1291 } 1292 /* set up the cs buffer */ 1293 dst_ptr = adev->gfx.rlc.cs_ptr; 1294 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1295 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1296 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1297 } 1298 1299 if ((adev->asic_type == CHIP_CARRIZO) || 1300 (adev->asic_type == CHIP_STONEY)) { 1301 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1302 if (adev->gfx.rlc.cp_table_obj == NULL) { 1303 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1304 AMDGPU_GEM_DOMAIN_VRAM, 1305 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1306 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1307 NULL, NULL, 1308 &adev->gfx.rlc.cp_table_obj); 1309 if (r) { 1310 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1311 return r; 1312 } 1313 } 1314 1315 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1316 if (unlikely(r != 0)) { 1317 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1318 return r; 1319 } 1320 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1321 &adev->gfx.rlc.cp_table_gpu_addr); 1322 if (r) { 1323 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1324 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1325 return r; 1326 } 1327 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1328 if (r) { 1329 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1330 return r; 1331 } 1332 1333 cz_init_cp_jump_table(adev); 1334 1335 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1336 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1337 } 1338 1339 return 0; 1340 } 1341 1342 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1343 { 1344 int r; 1345 1346 if (adev->gfx.mec.hpd_eop_obj) { 1347 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1348 if (unlikely(r != 0)) 1349 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1350 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1351 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1352 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1353 adev->gfx.mec.hpd_eop_obj = NULL; 1354 } 1355 } 1356 1357 #define MEC_HPD_SIZE 2048 1358 1359 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1360 { 1361 int r; 1362 u32 *hpd; 1363 1364 /* 1365 * we assign only 1 pipe because all other pipes will 1366 * be handled by KFD 1367 */ 1368 adev->gfx.mec.num_mec = 1; 1369 adev->gfx.mec.num_pipe = 1; 1370 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1371 1372 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1373 r = amdgpu_bo_create(adev, 1374 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 1375 PAGE_SIZE, true, 1376 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1377 &adev->gfx.mec.hpd_eop_obj); 1378 if (r) { 1379 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1380 return r; 1381 } 1382 } 1383 1384 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1385 if (unlikely(r != 0)) { 1386 gfx_v8_0_mec_fini(adev); 1387 return r; 1388 } 1389 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1390 &adev->gfx.mec.hpd_eop_gpu_addr); 1391 if (r) { 1392 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1393 gfx_v8_0_mec_fini(adev); 1394 return r; 1395 } 1396 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1397 if (r) { 1398 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1399 gfx_v8_0_mec_fini(adev); 1400 return r; 1401 } 1402 1403 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 1404 1405 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1406 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1407 1408 return 0; 1409 } 1410 1411 static const u32 vgpr_init_compute_shader[] = 1412 { 1413 0x7e000209, 0x7e020208, 1414 0x7e040207, 0x7e060206, 1415 0x7e080205, 0x7e0a0204, 1416 0x7e0c0203, 0x7e0e0202, 1417 0x7e100201, 0x7e120200, 1418 0x7e140209, 0x7e160208, 1419 0x7e180207, 0x7e1a0206, 1420 0x7e1c0205, 0x7e1e0204, 1421 0x7e200203, 0x7e220202, 1422 0x7e240201, 0x7e260200, 1423 0x7e280209, 0x7e2a0208, 1424 0x7e2c0207, 0x7e2e0206, 1425 0x7e300205, 0x7e320204, 1426 0x7e340203, 0x7e360202, 1427 0x7e380201, 0x7e3a0200, 1428 0x7e3c0209, 0x7e3e0208, 1429 0x7e400207, 0x7e420206, 1430 0x7e440205, 0x7e460204, 1431 0x7e480203, 0x7e4a0202, 1432 0x7e4c0201, 0x7e4e0200, 1433 0x7e500209, 0x7e520208, 1434 0x7e540207, 0x7e560206, 1435 0x7e580205, 0x7e5a0204, 1436 0x7e5c0203, 0x7e5e0202, 1437 0x7e600201, 0x7e620200, 1438 0x7e640209, 0x7e660208, 1439 0x7e680207, 0x7e6a0206, 1440 0x7e6c0205, 0x7e6e0204, 1441 0x7e700203, 0x7e720202, 1442 0x7e740201, 0x7e760200, 1443 0x7e780209, 0x7e7a0208, 1444 0x7e7c0207, 0x7e7e0206, 1445 0xbf8a0000, 0xbf810000, 1446 }; 1447 1448 static const u32 sgpr_init_compute_shader[] = 1449 { 1450 0xbe8a0100, 0xbe8c0102, 1451 0xbe8e0104, 0xbe900106, 1452 0xbe920108, 0xbe940100, 1453 0xbe960102, 0xbe980104, 1454 0xbe9a0106, 0xbe9c0108, 1455 0xbe9e0100, 0xbea00102, 1456 0xbea20104, 0xbea40106, 1457 0xbea60108, 0xbea80100, 1458 0xbeaa0102, 0xbeac0104, 1459 0xbeae0106, 0xbeb00108, 1460 0xbeb20100, 0xbeb40102, 1461 0xbeb60104, 0xbeb80106, 1462 0xbeba0108, 0xbebc0100, 1463 0xbebe0102, 0xbec00104, 1464 0xbec20106, 0xbec40108, 1465 0xbec60100, 0xbec80102, 1466 0xbee60004, 0xbee70005, 1467 0xbeea0006, 0xbeeb0007, 1468 0xbee80008, 0xbee90009, 1469 0xbefc0000, 0xbf8a0000, 1470 0xbf810000, 0x00000000, 1471 }; 1472 1473 static const u32 vgpr_init_regs[] = 1474 { 1475 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1476 mmCOMPUTE_RESOURCE_LIMITS, 0, 1477 mmCOMPUTE_NUM_THREAD_X, 256*4, 1478 mmCOMPUTE_NUM_THREAD_Y, 1, 1479 mmCOMPUTE_NUM_THREAD_Z, 1, 1480 mmCOMPUTE_PGM_RSRC2, 20, 1481 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1482 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1483 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1484 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1485 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1486 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1487 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1488 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1489 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1490 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1491 }; 1492 1493 static const u32 sgpr1_init_regs[] = 1494 { 1495 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1496 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1497 mmCOMPUTE_NUM_THREAD_X, 256*5, 1498 mmCOMPUTE_NUM_THREAD_Y, 1, 1499 mmCOMPUTE_NUM_THREAD_Z, 1, 1500 mmCOMPUTE_PGM_RSRC2, 20, 1501 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1502 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1503 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1504 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1505 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1506 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1507 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1508 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1509 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1510 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1511 }; 1512 1513 static const u32 sgpr2_init_regs[] = 1514 { 1515 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1516 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1517 mmCOMPUTE_NUM_THREAD_X, 256*5, 1518 mmCOMPUTE_NUM_THREAD_Y, 1, 1519 mmCOMPUTE_NUM_THREAD_Z, 1, 1520 mmCOMPUTE_PGM_RSRC2, 20, 1521 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1522 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1523 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1524 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1525 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1526 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1527 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1528 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1529 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1530 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1531 }; 1532 1533 static const u32 sec_ded_counter_registers[] = 1534 { 1535 mmCPC_EDC_ATC_CNT, 1536 mmCPC_EDC_SCRATCH_CNT, 1537 mmCPC_EDC_UCODE_CNT, 1538 mmCPF_EDC_ATC_CNT, 1539 mmCPF_EDC_ROQ_CNT, 1540 mmCPF_EDC_TAG_CNT, 1541 mmCPG_EDC_ATC_CNT, 1542 mmCPG_EDC_DMA_CNT, 1543 mmCPG_EDC_TAG_CNT, 1544 mmDC_EDC_CSINVOC_CNT, 1545 mmDC_EDC_RESTORE_CNT, 1546 mmDC_EDC_STATE_CNT, 1547 mmGDS_EDC_CNT, 1548 mmGDS_EDC_GRBM_CNT, 1549 mmGDS_EDC_OA_DED, 1550 mmSPI_EDC_CNT, 1551 mmSQC_ATC_EDC_GATCL1_CNT, 1552 mmSQC_EDC_CNT, 1553 mmSQ_EDC_DED_CNT, 1554 mmSQ_EDC_INFO, 1555 mmSQ_EDC_SEC_CNT, 1556 mmTCC_EDC_CNT, 1557 mmTCP_ATC_EDC_GATCL1_CNT, 1558 mmTCP_EDC_CNT, 1559 mmTD_EDC_CNT 1560 }; 1561 1562 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1563 { 1564 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1565 struct amdgpu_ib ib; 1566 struct dma_fence *f = NULL; 1567 int r, i; 1568 u32 tmp; 1569 unsigned total_size, vgpr_offset, sgpr_offset; 1570 u64 gpu_addr; 1571 1572 /* only supported on CZ */ 1573 if (adev->asic_type != CHIP_CARRIZO) 1574 return 0; 1575 1576 /* bail if the compute ring is not ready */ 1577 if (!ring->ready) 1578 return 0; 1579 1580 tmp = RREG32(mmGB_EDC_MODE); 1581 WREG32(mmGB_EDC_MODE, 0); 1582 1583 total_size = 1584 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1585 total_size += 1586 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1587 total_size += 1588 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1589 total_size = ALIGN(total_size, 256); 1590 vgpr_offset = total_size; 1591 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1592 sgpr_offset = total_size; 1593 total_size += sizeof(sgpr_init_compute_shader); 1594 1595 /* allocate an indirect buffer to put the commands in */ 1596 memset(&ib, 0, sizeof(ib)); 1597 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1598 if (r) { 1599 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1600 return r; 1601 } 1602 1603 /* load the compute shaders */ 1604 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1605 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1606 1607 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1608 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1609 1610 /* init the ib length to 0 */ 1611 ib.length_dw = 0; 1612 1613 /* VGPR */ 1614 /* write the register state for the compute dispatch */ 1615 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1616 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1617 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1618 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1619 } 1620 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1621 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1623 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1624 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1625 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1626 1627 /* write dispatch packet */ 1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1629 ib.ptr[ib.length_dw++] = 8; /* x */ 1630 ib.ptr[ib.length_dw++] = 1; /* y */ 1631 ib.ptr[ib.length_dw++] = 1; /* z */ 1632 ib.ptr[ib.length_dw++] = 1633 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1634 1635 /* write CS partial flush packet */ 1636 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1637 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1638 1639 /* SGPR1 */ 1640 /* write the register state for the compute dispatch */ 1641 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1642 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1643 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1644 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1645 } 1646 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1647 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1649 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1650 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1651 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1652 1653 /* write dispatch packet */ 1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1655 ib.ptr[ib.length_dw++] = 8; /* x */ 1656 ib.ptr[ib.length_dw++] = 1; /* y */ 1657 ib.ptr[ib.length_dw++] = 1; /* z */ 1658 ib.ptr[ib.length_dw++] = 1659 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1660 1661 /* write CS partial flush packet */ 1662 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1663 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1664 1665 /* SGPR2 */ 1666 /* write the register state for the compute dispatch */ 1667 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1668 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1669 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1670 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1671 } 1672 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1673 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1675 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1676 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1677 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1678 1679 /* write dispatch packet */ 1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1681 ib.ptr[ib.length_dw++] = 8; /* x */ 1682 ib.ptr[ib.length_dw++] = 1; /* y */ 1683 ib.ptr[ib.length_dw++] = 1; /* z */ 1684 ib.ptr[ib.length_dw++] = 1685 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1686 1687 /* write CS partial flush packet */ 1688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1689 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1690 1691 /* shedule the ib on the ring */ 1692 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 1693 if (r) { 1694 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1695 goto fail; 1696 } 1697 1698 /* wait for the GPU to finish processing the IB */ 1699 r = dma_fence_wait(f, false); 1700 if (r) { 1701 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1702 goto fail; 1703 } 1704 1705 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1706 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1707 WREG32(mmGB_EDC_MODE, tmp); 1708 1709 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1710 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1711 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1712 1713 1714 /* read back registers to clear the counters */ 1715 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1716 RREG32(sec_ded_counter_registers[i]); 1717 1718 fail: 1719 amdgpu_ib_free(adev, &ib, NULL); 1720 dma_fence_put(f); 1721 1722 return r; 1723 } 1724 1725 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1726 { 1727 u32 gb_addr_config; 1728 u32 mc_shared_chmap, mc_arb_ramcfg; 1729 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1730 u32 tmp; 1731 int ret; 1732 1733 switch (adev->asic_type) { 1734 case CHIP_TOPAZ: 1735 adev->gfx.config.max_shader_engines = 1; 1736 adev->gfx.config.max_tile_pipes = 2; 1737 adev->gfx.config.max_cu_per_sh = 6; 1738 adev->gfx.config.max_sh_per_se = 1; 1739 adev->gfx.config.max_backends_per_se = 2; 1740 adev->gfx.config.max_texture_channel_caches = 2; 1741 adev->gfx.config.max_gprs = 256; 1742 adev->gfx.config.max_gs_threads = 32; 1743 adev->gfx.config.max_hw_contexts = 8; 1744 1745 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1746 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1747 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1748 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1749 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1750 break; 1751 case CHIP_FIJI: 1752 adev->gfx.config.max_shader_engines = 4; 1753 adev->gfx.config.max_tile_pipes = 16; 1754 adev->gfx.config.max_cu_per_sh = 16; 1755 adev->gfx.config.max_sh_per_se = 1; 1756 adev->gfx.config.max_backends_per_se = 4; 1757 adev->gfx.config.max_texture_channel_caches = 16; 1758 adev->gfx.config.max_gprs = 256; 1759 adev->gfx.config.max_gs_threads = 32; 1760 adev->gfx.config.max_hw_contexts = 8; 1761 1762 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1763 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1764 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1765 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1766 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1767 break; 1768 case CHIP_POLARIS11: 1769 ret = amdgpu_atombios_get_gfx_info(adev); 1770 if (ret) 1771 return ret; 1772 adev->gfx.config.max_gprs = 256; 1773 adev->gfx.config.max_gs_threads = 32; 1774 adev->gfx.config.max_hw_contexts = 8; 1775 1776 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1777 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1778 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1779 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1780 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1781 break; 1782 case CHIP_POLARIS10: 1783 ret = amdgpu_atombios_get_gfx_info(adev); 1784 if (ret) 1785 return ret; 1786 adev->gfx.config.max_gprs = 256; 1787 adev->gfx.config.max_gs_threads = 32; 1788 adev->gfx.config.max_hw_contexts = 8; 1789 1790 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1791 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1792 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1793 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1794 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1795 break; 1796 case CHIP_TONGA: 1797 adev->gfx.config.max_shader_engines = 4; 1798 adev->gfx.config.max_tile_pipes = 8; 1799 adev->gfx.config.max_cu_per_sh = 8; 1800 adev->gfx.config.max_sh_per_se = 1; 1801 adev->gfx.config.max_backends_per_se = 2; 1802 adev->gfx.config.max_texture_channel_caches = 8; 1803 adev->gfx.config.max_gprs = 256; 1804 adev->gfx.config.max_gs_threads = 32; 1805 adev->gfx.config.max_hw_contexts = 8; 1806 1807 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1808 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1809 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1810 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1811 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1812 break; 1813 case CHIP_CARRIZO: 1814 adev->gfx.config.max_shader_engines = 1; 1815 adev->gfx.config.max_tile_pipes = 2; 1816 adev->gfx.config.max_sh_per_se = 1; 1817 adev->gfx.config.max_backends_per_se = 2; 1818 1819 switch (adev->pdev->revision) { 1820 case 0xc4: 1821 case 0x84: 1822 case 0xc8: 1823 case 0xcc: 1824 case 0xe1: 1825 case 0xe3: 1826 /* B10 */ 1827 adev->gfx.config.max_cu_per_sh = 8; 1828 break; 1829 case 0xc5: 1830 case 0x81: 1831 case 0x85: 1832 case 0xc9: 1833 case 0xcd: 1834 case 0xe2: 1835 case 0xe4: 1836 /* B8 */ 1837 adev->gfx.config.max_cu_per_sh = 6; 1838 break; 1839 case 0xc6: 1840 case 0xca: 1841 case 0xce: 1842 case 0x88: 1843 /* B6 */ 1844 adev->gfx.config.max_cu_per_sh = 6; 1845 break; 1846 case 0xc7: 1847 case 0x87: 1848 case 0xcb: 1849 case 0xe5: 1850 case 0x89: 1851 default: 1852 /* B4 */ 1853 adev->gfx.config.max_cu_per_sh = 4; 1854 break; 1855 } 1856 1857 adev->gfx.config.max_texture_channel_caches = 2; 1858 adev->gfx.config.max_gprs = 256; 1859 adev->gfx.config.max_gs_threads = 32; 1860 adev->gfx.config.max_hw_contexts = 8; 1861 1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1866 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1867 break; 1868 case CHIP_STONEY: 1869 adev->gfx.config.max_shader_engines = 1; 1870 adev->gfx.config.max_tile_pipes = 2; 1871 adev->gfx.config.max_sh_per_se = 1; 1872 adev->gfx.config.max_backends_per_se = 1; 1873 1874 switch (adev->pdev->revision) { 1875 case 0xc0: 1876 case 0xc1: 1877 case 0xc2: 1878 case 0xc4: 1879 case 0xc8: 1880 case 0xc9: 1881 adev->gfx.config.max_cu_per_sh = 3; 1882 break; 1883 case 0xd0: 1884 case 0xd1: 1885 case 0xd2: 1886 default: 1887 adev->gfx.config.max_cu_per_sh = 2; 1888 break; 1889 } 1890 1891 adev->gfx.config.max_texture_channel_caches = 2; 1892 adev->gfx.config.max_gprs = 256; 1893 adev->gfx.config.max_gs_threads = 16; 1894 adev->gfx.config.max_hw_contexts = 8; 1895 1896 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1897 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1898 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1899 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1900 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1901 break; 1902 default: 1903 adev->gfx.config.max_shader_engines = 2; 1904 adev->gfx.config.max_tile_pipes = 4; 1905 adev->gfx.config.max_cu_per_sh = 2; 1906 adev->gfx.config.max_sh_per_se = 1; 1907 adev->gfx.config.max_backends_per_se = 2; 1908 adev->gfx.config.max_texture_channel_caches = 4; 1909 adev->gfx.config.max_gprs = 256; 1910 adev->gfx.config.max_gs_threads = 32; 1911 adev->gfx.config.max_hw_contexts = 8; 1912 1913 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1914 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1915 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1916 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1917 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1918 break; 1919 } 1920 1921 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1922 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1923 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1924 1925 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1926 adev->gfx.config.mem_max_burst_length_bytes = 256; 1927 if (adev->flags & AMD_IS_APU) { 1928 /* Get memory bank mapping mode. */ 1929 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1930 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1931 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1932 1933 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1934 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1935 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1936 1937 /* Validate settings in case only one DIMM installed. */ 1938 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1939 dimm00_addr_map = 0; 1940 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1941 dimm01_addr_map = 0; 1942 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1943 dimm10_addr_map = 0; 1944 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1945 dimm11_addr_map = 0; 1946 1947 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1948 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1949 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1950 adev->gfx.config.mem_row_size_in_kb = 2; 1951 else 1952 adev->gfx.config.mem_row_size_in_kb = 1; 1953 } else { 1954 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1955 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1956 if (adev->gfx.config.mem_row_size_in_kb > 4) 1957 adev->gfx.config.mem_row_size_in_kb = 4; 1958 } 1959 1960 adev->gfx.config.shader_engine_tile_size = 32; 1961 adev->gfx.config.num_gpus = 1; 1962 adev->gfx.config.multi_gpu_tile_size = 64; 1963 1964 /* fix up row size */ 1965 switch (adev->gfx.config.mem_row_size_in_kb) { 1966 case 1: 1967 default: 1968 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1969 break; 1970 case 2: 1971 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1972 break; 1973 case 4: 1974 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1975 break; 1976 } 1977 adev->gfx.config.gb_addr_config = gb_addr_config; 1978 1979 return 0; 1980 } 1981 1982 static int gfx_v8_0_sw_init(void *handle) 1983 { 1984 int i, r; 1985 struct amdgpu_ring *ring; 1986 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1987 1988 /* EOP Event */ 1989 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 1990 if (r) 1991 return r; 1992 1993 /* Privileged reg */ 1994 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 1995 if (r) 1996 return r; 1997 1998 /* Privileged inst */ 1999 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 2000 if (r) 2001 return r; 2002 2003 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2004 2005 gfx_v8_0_scratch_init(adev); 2006 2007 r = gfx_v8_0_init_microcode(adev); 2008 if (r) { 2009 DRM_ERROR("Failed to load gfx firmware!\n"); 2010 return r; 2011 } 2012 2013 r = gfx_v8_0_rlc_init(adev); 2014 if (r) { 2015 DRM_ERROR("Failed to init rlc BOs!\n"); 2016 return r; 2017 } 2018 2019 r = gfx_v8_0_mec_init(adev); 2020 if (r) { 2021 DRM_ERROR("Failed to init MEC BOs!\n"); 2022 return r; 2023 } 2024 2025 /* set up the gfx ring */ 2026 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2027 ring = &adev->gfx.gfx_ring[i]; 2028 ring->ring_obj = NULL; 2029 sprintf(ring->name, "gfx"); 2030 /* no gfx doorbells on iceland */ 2031 if (adev->asic_type != CHIP_TOPAZ) { 2032 ring->use_doorbell = true; 2033 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2034 } 2035 2036 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2037 AMDGPU_CP_IRQ_GFX_EOP); 2038 if (r) 2039 return r; 2040 } 2041 2042 /* set up the compute queues */ 2043 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2044 unsigned irq_type; 2045 2046 /* max 32 queues per MEC */ 2047 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2048 DRM_ERROR("Too many (%d) compute rings!\n", i); 2049 break; 2050 } 2051 ring = &adev->gfx.compute_ring[i]; 2052 ring->ring_obj = NULL; 2053 ring->use_doorbell = true; 2054 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2055 ring->me = 1; /* first MEC */ 2056 ring->pipe = i / 8; 2057 ring->queue = i % 8; 2058 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2059 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2060 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2061 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2062 irq_type); 2063 if (r) 2064 return r; 2065 } 2066 2067 /* reserve GDS, GWS and OA resource for gfx */ 2068 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2069 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2070 &adev->gds.gds_gfx_bo, NULL, NULL); 2071 if (r) 2072 return r; 2073 2074 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2075 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2076 &adev->gds.gws_gfx_bo, NULL, NULL); 2077 if (r) 2078 return r; 2079 2080 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2081 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2082 &adev->gds.oa_gfx_bo, NULL, NULL); 2083 if (r) 2084 return r; 2085 2086 adev->gfx.ce_ram_size = 0x8000; 2087 2088 r = gfx_v8_0_gpu_early_init(adev); 2089 if (r) 2090 return r; 2091 2092 return 0; 2093 } 2094 2095 static int gfx_v8_0_sw_fini(void *handle) 2096 { 2097 int i; 2098 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2099 2100 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2101 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2102 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2103 2104 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2105 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2106 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2107 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2108 2109 gfx_v8_0_mec_fini(adev); 2110 gfx_v8_0_rlc_fini(adev); 2111 gfx_v8_0_free_microcode(adev); 2112 2113 return 0; 2114 } 2115 2116 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2117 { 2118 uint32_t *modearray, *mod2array; 2119 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2120 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2121 u32 reg_offset; 2122 2123 modearray = adev->gfx.config.tile_mode_array; 2124 mod2array = adev->gfx.config.macrotile_mode_array; 2125 2126 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2127 modearray[reg_offset] = 0; 2128 2129 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2130 mod2array[reg_offset] = 0; 2131 2132 switch (adev->asic_type) { 2133 case CHIP_TOPAZ: 2134 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2135 PIPE_CONFIG(ADDR_SURF_P2) | 2136 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2137 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2138 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2139 PIPE_CONFIG(ADDR_SURF_P2) | 2140 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2141 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2142 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2143 PIPE_CONFIG(ADDR_SURF_P2) | 2144 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2145 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2146 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2147 PIPE_CONFIG(ADDR_SURF_P2) | 2148 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2149 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2150 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2151 PIPE_CONFIG(ADDR_SURF_P2) | 2152 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2153 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2154 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2155 PIPE_CONFIG(ADDR_SURF_P2) | 2156 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2158 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2159 PIPE_CONFIG(ADDR_SURF_P2) | 2160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2161 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2162 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2163 PIPE_CONFIG(ADDR_SURF_P2)); 2164 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2165 PIPE_CONFIG(ADDR_SURF_P2) | 2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2168 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2169 PIPE_CONFIG(ADDR_SURF_P2) | 2170 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2172 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2173 PIPE_CONFIG(ADDR_SURF_P2) | 2174 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2176 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2177 PIPE_CONFIG(ADDR_SURF_P2) | 2178 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2180 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2181 PIPE_CONFIG(ADDR_SURF_P2) | 2182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2184 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2185 PIPE_CONFIG(ADDR_SURF_P2) | 2186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2188 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2189 PIPE_CONFIG(ADDR_SURF_P2) | 2190 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2192 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2193 PIPE_CONFIG(ADDR_SURF_P2) | 2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2196 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2197 PIPE_CONFIG(ADDR_SURF_P2) | 2198 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2200 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2201 PIPE_CONFIG(ADDR_SURF_P2) | 2202 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2204 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2205 PIPE_CONFIG(ADDR_SURF_P2) | 2206 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2208 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2209 PIPE_CONFIG(ADDR_SURF_P2) | 2210 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2212 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2213 PIPE_CONFIG(ADDR_SURF_P2) | 2214 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2216 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2217 PIPE_CONFIG(ADDR_SURF_P2) | 2218 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2220 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2221 PIPE_CONFIG(ADDR_SURF_P2) | 2222 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2224 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2225 PIPE_CONFIG(ADDR_SURF_P2) | 2226 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2228 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2229 PIPE_CONFIG(ADDR_SURF_P2) | 2230 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2232 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2233 PIPE_CONFIG(ADDR_SURF_P2) | 2234 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2236 2237 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2240 NUM_BANKS(ADDR_SURF_8_BANK)); 2241 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2244 NUM_BANKS(ADDR_SURF_8_BANK)); 2245 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2248 NUM_BANKS(ADDR_SURF_8_BANK)); 2249 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2252 NUM_BANKS(ADDR_SURF_8_BANK)); 2253 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2256 NUM_BANKS(ADDR_SURF_8_BANK)); 2257 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2260 NUM_BANKS(ADDR_SURF_8_BANK)); 2261 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2264 NUM_BANKS(ADDR_SURF_8_BANK)); 2265 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2268 NUM_BANKS(ADDR_SURF_16_BANK)); 2269 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2270 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2271 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2272 NUM_BANKS(ADDR_SURF_16_BANK)); 2273 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2276 NUM_BANKS(ADDR_SURF_16_BANK)); 2277 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2280 NUM_BANKS(ADDR_SURF_16_BANK)); 2281 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2282 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2283 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2284 NUM_BANKS(ADDR_SURF_16_BANK)); 2285 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2288 NUM_BANKS(ADDR_SURF_16_BANK)); 2289 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2292 NUM_BANKS(ADDR_SURF_8_BANK)); 2293 2294 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2295 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2296 reg_offset != 23) 2297 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2298 2299 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2300 if (reg_offset != 7) 2301 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2302 2303 break; 2304 case CHIP_FIJI: 2305 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2309 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2311 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2313 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2316 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2317 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2321 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2323 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2325 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2329 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2330 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2332 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2333 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2334 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2335 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2337 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2339 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2343 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2347 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2351 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2352 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2353 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2355 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2359 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2363 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2365 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2367 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2369 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2371 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2372 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2373 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2375 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2379 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2380 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2381 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2383 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2384 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2385 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2387 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2388 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2389 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2391 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2392 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2395 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2396 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2397 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2399 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2400 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2401 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2403 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2407 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2411 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2413 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2415 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2416 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2417 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2419 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2420 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2421 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2423 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2424 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2427 2428 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2431 NUM_BANKS(ADDR_SURF_8_BANK)); 2432 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2435 NUM_BANKS(ADDR_SURF_8_BANK)); 2436 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2439 NUM_BANKS(ADDR_SURF_8_BANK)); 2440 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2443 NUM_BANKS(ADDR_SURF_8_BANK)); 2444 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2447 NUM_BANKS(ADDR_SURF_8_BANK)); 2448 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2451 NUM_BANKS(ADDR_SURF_8_BANK)); 2452 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2455 NUM_BANKS(ADDR_SURF_8_BANK)); 2456 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2459 NUM_BANKS(ADDR_SURF_8_BANK)); 2460 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2463 NUM_BANKS(ADDR_SURF_8_BANK)); 2464 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2467 NUM_BANKS(ADDR_SURF_8_BANK)); 2468 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2471 NUM_BANKS(ADDR_SURF_8_BANK)); 2472 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2475 NUM_BANKS(ADDR_SURF_8_BANK)); 2476 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2479 NUM_BANKS(ADDR_SURF_8_BANK)); 2480 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2483 NUM_BANKS(ADDR_SURF_4_BANK)); 2484 2485 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2486 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2487 2488 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2489 if (reg_offset != 7) 2490 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2491 2492 break; 2493 case CHIP_TONGA: 2494 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2495 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2496 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2498 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2499 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2501 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2502 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2503 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2506 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2507 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2508 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2510 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2511 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2513 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2514 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2518 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2519 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2522 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2523 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2524 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2525 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2526 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2527 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2528 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2532 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2536 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2540 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2541 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2544 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2548 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2552 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2554 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2556 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2558 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2560 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2562 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2564 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2565 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2566 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2568 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2569 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2570 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2572 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2573 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2574 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2576 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2577 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2578 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2580 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2581 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2582 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2584 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2585 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2586 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2588 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2589 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2590 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2592 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2594 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2596 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2597 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2598 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2600 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2601 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2602 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2604 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2607 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2608 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2610 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2612 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2613 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2616 2617 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2620 NUM_BANKS(ADDR_SURF_16_BANK)); 2621 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2624 NUM_BANKS(ADDR_SURF_16_BANK)); 2625 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2628 NUM_BANKS(ADDR_SURF_16_BANK)); 2629 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2630 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2631 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2632 NUM_BANKS(ADDR_SURF_16_BANK)); 2633 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2636 NUM_BANKS(ADDR_SURF_16_BANK)); 2637 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2640 NUM_BANKS(ADDR_SURF_16_BANK)); 2641 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2642 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2643 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2644 NUM_BANKS(ADDR_SURF_16_BANK)); 2645 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2648 NUM_BANKS(ADDR_SURF_16_BANK)); 2649 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2652 NUM_BANKS(ADDR_SURF_16_BANK)); 2653 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2656 NUM_BANKS(ADDR_SURF_16_BANK)); 2657 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2660 NUM_BANKS(ADDR_SURF_16_BANK)); 2661 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2664 NUM_BANKS(ADDR_SURF_8_BANK)); 2665 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2666 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2667 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2668 NUM_BANKS(ADDR_SURF_4_BANK)); 2669 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2672 NUM_BANKS(ADDR_SURF_4_BANK)); 2673 2674 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2675 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2676 2677 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2678 if (reg_offset != 7) 2679 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2680 2681 break; 2682 case CHIP_POLARIS11: 2683 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2684 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2685 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2687 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2688 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2689 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2691 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2692 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2693 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2695 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2697 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2699 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2700 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2701 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2702 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2703 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2707 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2711 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2713 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2714 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2715 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2716 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2717 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2718 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2719 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2721 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2723 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2725 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2726 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2727 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2729 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2730 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2731 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2733 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2734 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2735 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2737 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2739 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2741 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2743 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2745 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2747 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2749 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2751 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2753 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2757 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2759 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2761 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2765 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2767 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2769 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2773 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2777 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2781 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2785 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2789 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2793 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2797 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2801 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2805 2806 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2809 NUM_BANKS(ADDR_SURF_16_BANK)); 2810 2811 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2812 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2813 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2814 NUM_BANKS(ADDR_SURF_16_BANK)); 2815 2816 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2819 NUM_BANKS(ADDR_SURF_16_BANK)); 2820 2821 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2824 NUM_BANKS(ADDR_SURF_16_BANK)); 2825 2826 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2829 NUM_BANKS(ADDR_SURF_16_BANK)); 2830 2831 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2834 NUM_BANKS(ADDR_SURF_16_BANK)); 2835 2836 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2839 NUM_BANKS(ADDR_SURF_16_BANK)); 2840 2841 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2844 NUM_BANKS(ADDR_SURF_16_BANK)); 2845 2846 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2849 NUM_BANKS(ADDR_SURF_16_BANK)); 2850 2851 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2854 NUM_BANKS(ADDR_SURF_16_BANK)); 2855 2856 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2859 NUM_BANKS(ADDR_SURF_16_BANK)); 2860 2861 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2864 NUM_BANKS(ADDR_SURF_16_BANK)); 2865 2866 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2869 NUM_BANKS(ADDR_SURF_8_BANK)); 2870 2871 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2874 NUM_BANKS(ADDR_SURF_4_BANK)); 2875 2876 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2877 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2878 2879 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2880 if (reg_offset != 7) 2881 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2882 2883 break; 2884 case CHIP_POLARIS10: 2885 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2886 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2887 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2888 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2889 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2890 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2891 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2892 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2893 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2894 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2895 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2896 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2897 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2898 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2899 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2900 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2901 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2902 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2903 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2904 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2905 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2909 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2912 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2913 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2914 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2917 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2919 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2920 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2921 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2923 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2924 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2925 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2927 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2928 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2931 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2932 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2933 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2934 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2935 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2936 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2937 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2939 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2941 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2943 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2945 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2947 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2949 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2951 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2952 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2953 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2955 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2959 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2962 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2963 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2967 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2971 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2975 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2976 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2979 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2983 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2987 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2991 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2995 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2996 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2999 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3003 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3004 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3007 3008 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3011 NUM_BANKS(ADDR_SURF_16_BANK)); 3012 3013 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3016 NUM_BANKS(ADDR_SURF_16_BANK)); 3017 3018 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3021 NUM_BANKS(ADDR_SURF_16_BANK)); 3022 3023 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3026 NUM_BANKS(ADDR_SURF_16_BANK)); 3027 3028 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3031 NUM_BANKS(ADDR_SURF_16_BANK)); 3032 3033 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3036 NUM_BANKS(ADDR_SURF_16_BANK)); 3037 3038 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3041 NUM_BANKS(ADDR_SURF_16_BANK)); 3042 3043 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3046 NUM_BANKS(ADDR_SURF_16_BANK)); 3047 3048 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3051 NUM_BANKS(ADDR_SURF_16_BANK)); 3052 3053 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3056 NUM_BANKS(ADDR_SURF_16_BANK)); 3057 3058 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3061 NUM_BANKS(ADDR_SURF_16_BANK)); 3062 3063 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3066 NUM_BANKS(ADDR_SURF_8_BANK)); 3067 3068 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3071 NUM_BANKS(ADDR_SURF_4_BANK)); 3072 3073 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3076 NUM_BANKS(ADDR_SURF_4_BANK)); 3077 3078 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3079 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3080 3081 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3082 if (reg_offset != 7) 3083 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3084 3085 break; 3086 case CHIP_STONEY: 3087 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3088 PIPE_CONFIG(ADDR_SURF_P2) | 3089 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3091 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3092 PIPE_CONFIG(ADDR_SURF_P2) | 3093 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3094 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3095 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3096 PIPE_CONFIG(ADDR_SURF_P2) | 3097 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3098 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3099 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3100 PIPE_CONFIG(ADDR_SURF_P2) | 3101 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3103 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3104 PIPE_CONFIG(ADDR_SURF_P2) | 3105 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3107 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3108 PIPE_CONFIG(ADDR_SURF_P2) | 3109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3111 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3112 PIPE_CONFIG(ADDR_SURF_P2) | 3113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3115 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3116 PIPE_CONFIG(ADDR_SURF_P2)); 3117 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3118 PIPE_CONFIG(ADDR_SURF_P2) | 3119 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3121 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3122 PIPE_CONFIG(ADDR_SURF_P2) | 3123 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3125 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3126 PIPE_CONFIG(ADDR_SURF_P2) | 3127 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3129 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3130 PIPE_CONFIG(ADDR_SURF_P2) | 3131 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3133 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3134 PIPE_CONFIG(ADDR_SURF_P2) | 3135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3137 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3138 PIPE_CONFIG(ADDR_SURF_P2) | 3139 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3141 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3142 PIPE_CONFIG(ADDR_SURF_P2) | 3143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3145 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3146 PIPE_CONFIG(ADDR_SURF_P2) | 3147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3149 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3150 PIPE_CONFIG(ADDR_SURF_P2) | 3151 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3153 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3154 PIPE_CONFIG(ADDR_SURF_P2) | 3155 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3157 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3158 PIPE_CONFIG(ADDR_SURF_P2) | 3159 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3161 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3162 PIPE_CONFIG(ADDR_SURF_P2) | 3163 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3165 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3166 PIPE_CONFIG(ADDR_SURF_P2) | 3167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3169 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3170 PIPE_CONFIG(ADDR_SURF_P2) | 3171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3173 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3174 PIPE_CONFIG(ADDR_SURF_P2) | 3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3177 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3178 PIPE_CONFIG(ADDR_SURF_P2) | 3179 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3181 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3182 PIPE_CONFIG(ADDR_SURF_P2) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3185 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3186 PIPE_CONFIG(ADDR_SURF_P2) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3189 3190 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3193 NUM_BANKS(ADDR_SURF_8_BANK)); 3194 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3197 NUM_BANKS(ADDR_SURF_8_BANK)); 3198 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3201 NUM_BANKS(ADDR_SURF_8_BANK)); 3202 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3205 NUM_BANKS(ADDR_SURF_8_BANK)); 3206 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3209 NUM_BANKS(ADDR_SURF_8_BANK)); 3210 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3213 NUM_BANKS(ADDR_SURF_8_BANK)); 3214 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3217 NUM_BANKS(ADDR_SURF_8_BANK)); 3218 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3221 NUM_BANKS(ADDR_SURF_16_BANK)); 3222 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3225 NUM_BANKS(ADDR_SURF_16_BANK)); 3226 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3229 NUM_BANKS(ADDR_SURF_16_BANK)); 3230 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3233 NUM_BANKS(ADDR_SURF_16_BANK)); 3234 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3237 NUM_BANKS(ADDR_SURF_16_BANK)); 3238 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3241 NUM_BANKS(ADDR_SURF_16_BANK)); 3242 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3245 NUM_BANKS(ADDR_SURF_8_BANK)); 3246 3247 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3248 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3249 reg_offset != 23) 3250 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3251 3252 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3253 if (reg_offset != 7) 3254 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3255 3256 break; 3257 default: 3258 dev_warn(adev->dev, 3259 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3260 adev->asic_type); 3261 3262 case CHIP_CARRIZO: 3263 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3264 PIPE_CONFIG(ADDR_SURF_P2) | 3265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3267 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3268 PIPE_CONFIG(ADDR_SURF_P2) | 3269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3271 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3272 PIPE_CONFIG(ADDR_SURF_P2) | 3273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3275 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3276 PIPE_CONFIG(ADDR_SURF_P2) | 3277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3279 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3283 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3287 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3291 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3292 PIPE_CONFIG(ADDR_SURF_P2)); 3293 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3294 PIPE_CONFIG(ADDR_SURF_P2) | 3295 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3297 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3298 PIPE_CONFIG(ADDR_SURF_P2) | 3299 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3301 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3302 PIPE_CONFIG(ADDR_SURF_P2) | 3303 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3305 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3306 PIPE_CONFIG(ADDR_SURF_P2) | 3307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3309 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3310 PIPE_CONFIG(ADDR_SURF_P2) | 3311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3313 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3314 PIPE_CONFIG(ADDR_SURF_P2) | 3315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3317 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3318 PIPE_CONFIG(ADDR_SURF_P2) | 3319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3321 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3322 PIPE_CONFIG(ADDR_SURF_P2) | 3323 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3325 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3326 PIPE_CONFIG(ADDR_SURF_P2) | 3327 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3329 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3330 PIPE_CONFIG(ADDR_SURF_P2) | 3331 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3333 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3334 PIPE_CONFIG(ADDR_SURF_P2) | 3335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3337 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3338 PIPE_CONFIG(ADDR_SURF_P2) | 3339 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3341 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3342 PIPE_CONFIG(ADDR_SURF_P2) | 3343 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3345 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3346 PIPE_CONFIG(ADDR_SURF_P2) | 3347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3349 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3350 PIPE_CONFIG(ADDR_SURF_P2) | 3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3353 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3354 PIPE_CONFIG(ADDR_SURF_P2) | 3355 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3357 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3358 PIPE_CONFIG(ADDR_SURF_P2) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3361 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3362 PIPE_CONFIG(ADDR_SURF_P2) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3365 3366 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3369 NUM_BANKS(ADDR_SURF_8_BANK)); 3370 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3373 NUM_BANKS(ADDR_SURF_8_BANK)); 3374 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3377 NUM_BANKS(ADDR_SURF_8_BANK)); 3378 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3381 NUM_BANKS(ADDR_SURF_8_BANK)); 3382 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3385 NUM_BANKS(ADDR_SURF_8_BANK)); 3386 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3389 NUM_BANKS(ADDR_SURF_8_BANK)); 3390 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3393 NUM_BANKS(ADDR_SURF_8_BANK)); 3394 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3397 NUM_BANKS(ADDR_SURF_16_BANK)); 3398 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3401 NUM_BANKS(ADDR_SURF_16_BANK)); 3402 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3405 NUM_BANKS(ADDR_SURF_16_BANK)); 3406 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3409 NUM_BANKS(ADDR_SURF_16_BANK)); 3410 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3413 NUM_BANKS(ADDR_SURF_16_BANK)); 3414 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3417 NUM_BANKS(ADDR_SURF_16_BANK)); 3418 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3421 NUM_BANKS(ADDR_SURF_8_BANK)); 3422 3423 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3424 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3425 reg_offset != 23) 3426 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3427 3428 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3429 if (reg_offset != 7) 3430 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3431 3432 break; 3433 } 3434 } 3435 3436 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3437 u32 se_num, u32 sh_num, u32 instance) 3438 { 3439 u32 data; 3440 3441 if (instance == 0xffffffff) 3442 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3443 else 3444 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3445 3446 if (se_num == 0xffffffff) 3447 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3448 else 3449 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3450 3451 if (sh_num == 0xffffffff) 3452 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3453 else 3454 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3455 3456 WREG32(mmGRBM_GFX_INDEX, data); 3457 } 3458 3459 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3460 { 3461 return (u32)((1ULL << bit_width) - 1); 3462 } 3463 3464 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3465 { 3466 u32 data, mask; 3467 3468 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3469 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3470 3471 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3472 3473 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3474 adev->gfx.config.max_sh_per_se); 3475 3476 return (~data) & mask; 3477 } 3478 3479 static void 3480 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3481 { 3482 switch (adev->asic_type) { 3483 case CHIP_FIJI: 3484 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3485 RB_XSEL2(1) | PKR_MAP(2) | 3486 PKR_XSEL(1) | PKR_YSEL(1) | 3487 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3488 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3489 SE_PAIR_YSEL(2); 3490 break; 3491 case CHIP_TONGA: 3492 case CHIP_POLARIS10: 3493 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3494 SE_XSEL(1) | SE_YSEL(1); 3495 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3496 SE_PAIR_YSEL(2); 3497 break; 3498 case CHIP_TOPAZ: 3499 case CHIP_CARRIZO: 3500 *rconf |= RB_MAP_PKR0(2); 3501 *rconf1 |= 0x0; 3502 break; 3503 case CHIP_POLARIS11: 3504 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3505 SE_XSEL(1) | SE_YSEL(1); 3506 *rconf1 |= 0x0; 3507 break; 3508 case CHIP_STONEY: 3509 *rconf |= 0x0; 3510 *rconf1 |= 0x0; 3511 break; 3512 default: 3513 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3514 break; 3515 } 3516 } 3517 3518 static void 3519 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3520 u32 raster_config, u32 raster_config_1, 3521 unsigned rb_mask, unsigned num_rb) 3522 { 3523 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3524 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3525 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3526 unsigned rb_per_se = num_rb / num_se; 3527 unsigned se_mask[4]; 3528 unsigned se; 3529 3530 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3531 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3532 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3533 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3534 3535 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3536 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3537 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3538 3539 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3540 (!se_mask[2] && !se_mask[3]))) { 3541 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3542 3543 if (!se_mask[0] && !se_mask[1]) { 3544 raster_config_1 |= 3545 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3546 } else { 3547 raster_config_1 |= 3548 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3549 } 3550 } 3551 3552 for (se = 0; se < num_se; se++) { 3553 unsigned raster_config_se = raster_config; 3554 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3555 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3556 int idx = (se / 2) * 2; 3557 3558 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3559 raster_config_se &= ~SE_MAP_MASK; 3560 3561 if (!se_mask[idx]) { 3562 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3563 } else { 3564 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3565 } 3566 } 3567 3568 pkr0_mask &= rb_mask; 3569 pkr1_mask &= rb_mask; 3570 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3571 raster_config_se &= ~PKR_MAP_MASK; 3572 3573 if (!pkr0_mask) { 3574 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3575 } else { 3576 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3577 } 3578 } 3579 3580 if (rb_per_se >= 2) { 3581 unsigned rb0_mask = 1 << (se * rb_per_se); 3582 unsigned rb1_mask = rb0_mask << 1; 3583 3584 rb0_mask &= rb_mask; 3585 rb1_mask &= rb_mask; 3586 if (!rb0_mask || !rb1_mask) { 3587 raster_config_se &= ~RB_MAP_PKR0_MASK; 3588 3589 if (!rb0_mask) { 3590 raster_config_se |= 3591 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3592 } else { 3593 raster_config_se |= 3594 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3595 } 3596 } 3597 3598 if (rb_per_se > 2) { 3599 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3600 rb1_mask = rb0_mask << 1; 3601 rb0_mask &= rb_mask; 3602 rb1_mask &= rb_mask; 3603 if (!rb0_mask || !rb1_mask) { 3604 raster_config_se &= ~RB_MAP_PKR1_MASK; 3605 3606 if (!rb0_mask) { 3607 raster_config_se |= 3608 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3609 } else { 3610 raster_config_se |= 3611 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3612 } 3613 } 3614 } 3615 } 3616 3617 /* GRBM_GFX_INDEX has a different offset on VI */ 3618 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3619 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3620 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3621 } 3622 3623 /* GRBM_GFX_INDEX has a different offset on VI */ 3624 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3625 } 3626 3627 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3628 { 3629 int i, j; 3630 u32 data; 3631 u32 raster_config = 0, raster_config_1 = 0; 3632 u32 active_rbs = 0; 3633 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3634 adev->gfx.config.max_sh_per_se; 3635 unsigned num_rb_pipes; 3636 3637 mutex_lock(&adev->grbm_idx_mutex); 3638 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3639 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3640 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3641 data = gfx_v8_0_get_rb_active_bitmap(adev); 3642 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3643 rb_bitmap_width_per_sh); 3644 } 3645 } 3646 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3647 3648 adev->gfx.config.backend_enable_mask = active_rbs; 3649 adev->gfx.config.num_rbs = hweight32(active_rbs); 3650 3651 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3652 adev->gfx.config.max_shader_engines, 16); 3653 3654 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3655 3656 if (!adev->gfx.config.backend_enable_mask || 3657 adev->gfx.config.num_rbs >= num_rb_pipes) { 3658 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3659 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3660 } else { 3661 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3662 adev->gfx.config.backend_enable_mask, 3663 num_rb_pipes); 3664 } 3665 3666 /* cache the values for userspace */ 3667 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3668 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3669 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3670 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3671 RREG32(mmCC_RB_BACKEND_DISABLE); 3672 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3673 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3674 adev->gfx.config.rb_config[i][j].raster_config = 3675 RREG32(mmPA_SC_RASTER_CONFIG); 3676 adev->gfx.config.rb_config[i][j].raster_config_1 = 3677 RREG32(mmPA_SC_RASTER_CONFIG_1); 3678 } 3679 } 3680 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3681 mutex_unlock(&adev->grbm_idx_mutex); 3682 } 3683 3684 /** 3685 * gfx_v8_0_init_compute_vmid - gart enable 3686 * 3687 * @rdev: amdgpu_device pointer 3688 * 3689 * Initialize compute vmid sh_mem registers 3690 * 3691 */ 3692 #define DEFAULT_SH_MEM_BASES (0x6000) 3693 #define FIRST_COMPUTE_VMID (8) 3694 #define LAST_COMPUTE_VMID (16) 3695 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3696 { 3697 int i; 3698 uint32_t sh_mem_config; 3699 uint32_t sh_mem_bases; 3700 3701 /* 3702 * Configure apertures: 3703 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3704 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3705 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3706 */ 3707 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3708 3709 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3710 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3711 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3712 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3713 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3714 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3715 3716 mutex_lock(&adev->srbm_mutex); 3717 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3718 vi_srbm_select(adev, 0, 0, 0, i); 3719 /* CP and shaders */ 3720 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3721 WREG32(mmSH_MEM_APE1_BASE, 1); 3722 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3723 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3724 } 3725 vi_srbm_select(adev, 0, 0, 0, 0); 3726 mutex_unlock(&adev->srbm_mutex); 3727 } 3728 3729 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3730 { 3731 u32 tmp; 3732 int i; 3733 3734 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3735 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3736 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3737 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3738 3739 gfx_v8_0_tiling_mode_table_init(adev); 3740 gfx_v8_0_setup_rb(adev); 3741 gfx_v8_0_get_cu_info(adev); 3742 3743 /* XXX SH_MEM regs */ 3744 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3745 mutex_lock(&adev->srbm_mutex); 3746 for (i = 0; i < 16; i++) { 3747 vi_srbm_select(adev, 0, 0, 0, i); 3748 /* CP and shaders */ 3749 if (i == 0) { 3750 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3751 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3752 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3753 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3754 WREG32(mmSH_MEM_CONFIG, tmp); 3755 } else { 3756 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3757 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 3758 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3759 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3760 WREG32(mmSH_MEM_CONFIG, tmp); 3761 } 3762 3763 WREG32(mmSH_MEM_APE1_BASE, 1); 3764 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3765 WREG32(mmSH_MEM_BASES, 0); 3766 } 3767 vi_srbm_select(adev, 0, 0, 0, 0); 3768 mutex_unlock(&adev->srbm_mutex); 3769 3770 gfx_v8_0_init_compute_vmid(adev); 3771 3772 mutex_lock(&adev->grbm_idx_mutex); 3773 /* 3774 * making sure that the following register writes will be broadcasted 3775 * to all the shaders 3776 */ 3777 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3778 3779 WREG32(mmPA_SC_FIFO_SIZE, 3780 (adev->gfx.config.sc_prim_fifo_size_frontend << 3781 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3782 (adev->gfx.config.sc_prim_fifo_size_backend << 3783 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3784 (adev->gfx.config.sc_hiz_tile_fifo_size << 3785 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3786 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3787 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3788 mutex_unlock(&adev->grbm_idx_mutex); 3789 3790 } 3791 3792 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3793 { 3794 u32 i, j, k; 3795 u32 mask; 3796 3797 mutex_lock(&adev->grbm_idx_mutex); 3798 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3799 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3800 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3801 for (k = 0; k < adev->usec_timeout; k++) { 3802 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3803 break; 3804 udelay(1); 3805 } 3806 } 3807 } 3808 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3809 mutex_unlock(&adev->grbm_idx_mutex); 3810 3811 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3812 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3813 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3814 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3815 for (k = 0; k < adev->usec_timeout; k++) { 3816 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3817 break; 3818 udelay(1); 3819 } 3820 } 3821 3822 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3823 bool enable) 3824 { 3825 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3826 3827 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3828 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3829 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3830 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3831 3832 WREG32(mmCP_INT_CNTL_RING0, tmp); 3833 } 3834 3835 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3836 { 3837 /* csib */ 3838 WREG32(mmRLC_CSIB_ADDR_HI, 3839 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3840 WREG32(mmRLC_CSIB_ADDR_LO, 3841 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3842 WREG32(mmRLC_CSIB_LENGTH, 3843 adev->gfx.rlc.clear_state_size); 3844 } 3845 3846 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3847 int ind_offset, 3848 int list_size, 3849 int *unique_indices, 3850 int *indices_count, 3851 int max_indices, 3852 int *ind_start_offsets, 3853 int *offset_count, 3854 int max_offset) 3855 { 3856 int indices; 3857 bool new_entry = true; 3858 3859 for (; ind_offset < list_size; ind_offset++) { 3860 3861 if (new_entry) { 3862 new_entry = false; 3863 ind_start_offsets[*offset_count] = ind_offset; 3864 *offset_count = *offset_count + 1; 3865 BUG_ON(*offset_count >= max_offset); 3866 } 3867 3868 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3869 new_entry = true; 3870 continue; 3871 } 3872 3873 ind_offset += 2; 3874 3875 /* look for the matching indice */ 3876 for (indices = 0; 3877 indices < *indices_count; 3878 indices++) { 3879 if (unique_indices[indices] == 3880 register_list_format[ind_offset]) 3881 break; 3882 } 3883 3884 if (indices >= *indices_count) { 3885 unique_indices[*indices_count] = 3886 register_list_format[ind_offset]; 3887 indices = *indices_count; 3888 *indices_count = *indices_count + 1; 3889 BUG_ON(*indices_count >= max_indices); 3890 } 3891 3892 register_list_format[ind_offset] = indices; 3893 } 3894 } 3895 3896 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3897 { 3898 int i, temp, data; 3899 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3900 int indices_count = 0; 3901 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3902 int offset_count = 0; 3903 3904 int list_size; 3905 unsigned int *register_list_format = 3906 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3907 if (!register_list_format) 3908 return -ENOMEM; 3909 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3910 adev->gfx.rlc.reg_list_format_size_bytes); 3911 3912 gfx_v8_0_parse_ind_reg_list(register_list_format, 3913 RLC_FormatDirectRegListLength, 3914 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3915 unique_indices, 3916 &indices_count, 3917 sizeof(unique_indices) / sizeof(int), 3918 indirect_start_offsets, 3919 &offset_count, 3920 sizeof(indirect_start_offsets)/sizeof(int)); 3921 3922 /* save and restore list */ 3923 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3924 3925 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3926 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3927 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3928 3929 /* indirect list */ 3930 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3931 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3932 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3933 3934 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3935 list_size = list_size >> 1; 3936 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3937 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3938 3939 /* starting offsets starts */ 3940 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3941 adev->gfx.rlc.starting_offsets_start); 3942 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3943 WREG32(mmRLC_GPM_SCRATCH_DATA, 3944 indirect_start_offsets[i]); 3945 3946 /* unique indices */ 3947 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3948 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3949 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3950 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); 3951 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); 3952 } 3953 kfree(register_list_format); 3954 3955 return 0; 3956 } 3957 3958 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3959 { 3960 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 3961 } 3962 3963 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 3964 { 3965 uint32_t data; 3966 3967 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3968 AMD_PG_SUPPORT_GFX_SMG | 3969 AMD_PG_SUPPORT_GFX_DMG)) { 3970 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 3971 3972 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 3973 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 3974 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 3975 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 3976 WREG32(mmRLC_PG_DELAY, data); 3977 3978 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 3979 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 3980 } 3981 } 3982 3983 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3984 bool enable) 3985 { 3986 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 3987 } 3988 3989 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 3990 bool enable) 3991 { 3992 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 3993 } 3994 3995 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 3996 { 3997 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0); 3998 } 3999 4000 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4001 { 4002 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 4003 AMD_PG_SUPPORT_GFX_SMG | 4004 AMD_PG_SUPPORT_GFX_DMG | 4005 AMD_PG_SUPPORT_CP | 4006 AMD_PG_SUPPORT_GDS | 4007 AMD_PG_SUPPORT_RLC_SMU_HS)) { 4008 gfx_v8_0_init_csb(adev); 4009 gfx_v8_0_init_save_restore_list(adev); 4010 gfx_v8_0_enable_save_restore_machine(adev); 4011 4012 if ((adev->asic_type == CHIP_CARRIZO) || 4013 (adev->asic_type == CHIP_STONEY)) { 4014 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4015 gfx_v8_0_init_power_gating(adev); 4016 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4017 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4018 cz_enable_sck_slow_down_on_power_up(adev, true); 4019 cz_enable_sck_slow_down_on_power_down(adev, true); 4020 } else { 4021 cz_enable_sck_slow_down_on_power_up(adev, false); 4022 cz_enable_sck_slow_down_on_power_down(adev, false); 4023 } 4024 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4025 cz_enable_cp_power_gating(adev, true); 4026 else 4027 cz_enable_cp_power_gating(adev, false); 4028 } else if (adev->asic_type == CHIP_POLARIS11) { 4029 gfx_v8_0_init_power_gating(adev); 4030 } 4031 } 4032 } 4033 4034 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4035 { 4036 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4037 4038 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4039 gfx_v8_0_wait_for_rlc_serdes(adev); 4040 } 4041 4042 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4043 { 4044 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4045 udelay(50); 4046 4047 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4048 udelay(50); 4049 } 4050 4051 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4052 { 4053 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4054 4055 /* carrizo do enable cp interrupt after cp inited */ 4056 if (!(adev->flags & AMD_IS_APU)) 4057 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4058 4059 udelay(50); 4060 } 4061 4062 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4063 { 4064 const struct rlc_firmware_header_v2_0 *hdr; 4065 const __le32 *fw_data; 4066 unsigned i, fw_size; 4067 4068 if (!adev->gfx.rlc_fw) 4069 return -EINVAL; 4070 4071 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4072 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4073 4074 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4075 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4076 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4077 4078 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4079 for (i = 0; i < fw_size; i++) 4080 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4081 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4082 4083 return 0; 4084 } 4085 4086 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4087 { 4088 int r; 4089 u32 tmp; 4090 4091 gfx_v8_0_rlc_stop(adev); 4092 4093 /* disable CG */ 4094 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4095 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4096 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4097 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4098 if (adev->asic_type == CHIP_POLARIS11 || 4099 adev->asic_type == CHIP_POLARIS10) { 4100 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4101 tmp &= ~0x3; 4102 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4103 } 4104 4105 /* disable PG */ 4106 WREG32(mmRLC_PG_CNTL, 0); 4107 4108 gfx_v8_0_rlc_reset(adev); 4109 gfx_v8_0_init_pg(adev); 4110 4111 if (!adev->pp_enabled) { 4112 if (!adev->firmware.smu_load) { 4113 /* legacy rlc firmware loading */ 4114 r = gfx_v8_0_rlc_load_microcode(adev); 4115 if (r) 4116 return r; 4117 } else { 4118 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4119 AMDGPU_UCODE_ID_RLC_G); 4120 if (r) 4121 return -EINVAL; 4122 } 4123 } 4124 4125 gfx_v8_0_rlc_start(adev); 4126 4127 return 0; 4128 } 4129 4130 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4131 { 4132 int i; 4133 u32 tmp = RREG32(mmCP_ME_CNTL); 4134 4135 if (enable) { 4136 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4137 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4138 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4139 } else { 4140 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4141 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4142 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4143 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4144 adev->gfx.gfx_ring[i].ready = false; 4145 } 4146 WREG32(mmCP_ME_CNTL, tmp); 4147 udelay(50); 4148 } 4149 4150 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4151 { 4152 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4153 const struct gfx_firmware_header_v1_0 *ce_hdr; 4154 const struct gfx_firmware_header_v1_0 *me_hdr; 4155 const __le32 *fw_data; 4156 unsigned i, fw_size; 4157 4158 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4159 return -EINVAL; 4160 4161 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4162 adev->gfx.pfp_fw->data; 4163 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4164 adev->gfx.ce_fw->data; 4165 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4166 adev->gfx.me_fw->data; 4167 4168 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4169 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4170 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4171 4172 gfx_v8_0_cp_gfx_enable(adev, false); 4173 4174 /* PFP */ 4175 fw_data = (const __le32 *) 4176 (adev->gfx.pfp_fw->data + 4177 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4178 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4179 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4180 for (i = 0; i < fw_size; i++) 4181 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4182 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4183 4184 /* CE */ 4185 fw_data = (const __le32 *) 4186 (adev->gfx.ce_fw->data + 4187 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4188 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4189 WREG32(mmCP_CE_UCODE_ADDR, 0); 4190 for (i = 0; i < fw_size; i++) 4191 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4192 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4193 4194 /* ME */ 4195 fw_data = (const __le32 *) 4196 (adev->gfx.me_fw->data + 4197 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4198 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4199 WREG32(mmCP_ME_RAM_WADDR, 0); 4200 for (i = 0; i < fw_size; i++) 4201 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4202 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4203 4204 return 0; 4205 } 4206 4207 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4208 { 4209 u32 count = 0; 4210 const struct cs_section_def *sect = NULL; 4211 const struct cs_extent_def *ext = NULL; 4212 4213 /* begin clear state */ 4214 count += 2; 4215 /* context control state */ 4216 count += 3; 4217 4218 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4219 for (ext = sect->section; ext->extent != NULL; ++ext) { 4220 if (sect->id == SECT_CONTEXT) 4221 count += 2 + ext->reg_count; 4222 else 4223 return 0; 4224 } 4225 } 4226 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4227 count += 4; 4228 /* end clear state */ 4229 count += 2; 4230 /* clear state */ 4231 count += 2; 4232 4233 return count; 4234 } 4235 4236 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4237 { 4238 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4239 const struct cs_section_def *sect = NULL; 4240 const struct cs_extent_def *ext = NULL; 4241 int r, i; 4242 4243 /* init the CP */ 4244 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4245 WREG32(mmCP_ENDIAN_SWAP, 0); 4246 WREG32(mmCP_DEVICE_ID, 1); 4247 4248 gfx_v8_0_cp_gfx_enable(adev, true); 4249 4250 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4251 if (r) { 4252 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4253 return r; 4254 } 4255 4256 /* clear state buffer */ 4257 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4258 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4259 4260 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4261 amdgpu_ring_write(ring, 0x80000000); 4262 amdgpu_ring_write(ring, 0x80000000); 4263 4264 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4265 for (ext = sect->section; ext->extent != NULL; ++ext) { 4266 if (sect->id == SECT_CONTEXT) { 4267 amdgpu_ring_write(ring, 4268 PACKET3(PACKET3_SET_CONTEXT_REG, 4269 ext->reg_count)); 4270 amdgpu_ring_write(ring, 4271 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4272 for (i = 0; i < ext->reg_count; i++) 4273 amdgpu_ring_write(ring, ext->extent[i]); 4274 } 4275 } 4276 } 4277 4278 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4279 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4280 switch (adev->asic_type) { 4281 case CHIP_TONGA: 4282 case CHIP_POLARIS10: 4283 amdgpu_ring_write(ring, 0x16000012); 4284 amdgpu_ring_write(ring, 0x0000002A); 4285 break; 4286 case CHIP_POLARIS11: 4287 amdgpu_ring_write(ring, 0x16000012); 4288 amdgpu_ring_write(ring, 0x00000000); 4289 break; 4290 case CHIP_FIJI: 4291 amdgpu_ring_write(ring, 0x3a00161a); 4292 amdgpu_ring_write(ring, 0x0000002e); 4293 break; 4294 case CHIP_CARRIZO: 4295 amdgpu_ring_write(ring, 0x00000002); 4296 amdgpu_ring_write(ring, 0x00000000); 4297 break; 4298 case CHIP_TOPAZ: 4299 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4300 0x00000000 : 0x00000002); 4301 amdgpu_ring_write(ring, 0x00000000); 4302 break; 4303 case CHIP_STONEY: 4304 amdgpu_ring_write(ring, 0x00000000); 4305 amdgpu_ring_write(ring, 0x00000000); 4306 break; 4307 default: 4308 BUG(); 4309 } 4310 4311 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4312 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4313 4314 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4315 amdgpu_ring_write(ring, 0); 4316 4317 /* init the CE partitions */ 4318 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4319 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4320 amdgpu_ring_write(ring, 0x8000); 4321 amdgpu_ring_write(ring, 0x8000); 4322 4323 amdgpu_ring_commit(ring); 4324 4325 return 0; 4326 } 4327 4328 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4329 { 4330 struct amdgpu_ring *ring; 4331 u32 tmp; 4332 u32 rb_bufsz; 4333 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4334 int r; 4335 4336 /* Set the write pointer delay */ 4337 WREG32(mmCP_RB_WPTR_DELAY, 0); 4338 4339 /* set the RB to use vmid 0 */ 4340 WREG32(mmCP_RB_VMID, 0); 4341 4342 /* Set ring buffer size */ 4343 ring = &adev->gfx.gfx_ring[0]; 4344 rb_bufsz = order_base_2(ring->ring_size / 8); 4345 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4346 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4348 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4349 #ifdef __BIG_ENDIAN 4350 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4351 #endif 4352 WREG32(mmCP_RB0_CNTL, tmp); 4353 4354 /* Initialize the ring buffer's read and write pointers */ 4355 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4356 ring->wptr = 0; 4357 WREG32(mmCP_RB0_WPTR, ring->wptr); 4358 4359 /* set the wb address wether it's enabled or not */ 4360 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4361 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4362 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4363 4364 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4365 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4366 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4367 mdelay(1); 4368 WREG32(mmCP_RB0_CNTL, tmp); 4369 4370 rb_addr = ring->gpu_addr >> 8; 4371 WREG32(mmCP_RB0_BASE, rb_addr); 4372 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4373 4374 /* no gfx doorbells on iceland */ 4375 if (adev->asic_type != CHIP_TOPAZ) { 4376 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4377 if (ring->use_doorbell) { 4378 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4379 DOORBELL_OFFSET, ring->doorbell_index); 4380 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4381 DOORBELL_HIT, 0); 4382 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4383 DOORBELL_EN, 1); 4384 } else { 4385 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4386 DOORBELL_EN, 0); 4387 } 4388 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4389 4390 if (adev->asic_type == CHIP_TONGA) { 4391 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4392 DOORBELL_RANGE_LOWER, 4393 AMDGPU_DOORBELL_GFX_RING0); 4394 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4395 4396 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4397 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4398 } 4399 4400 } 4401 4402 /* start the ring */ 4403 gfx_v8_0_cp_gfx_start(adev); 4404 ring->ready = true; 4405 r = amdgpu_ring_test_ring(ring); 4406 if (r) 4407 ring->ready = false; 4408 4409 return r; 4410 } 4411 4412 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4413 { 4414 int i; 4415 4416 if (enable) { 4417 WREG32(mmCP_MEC_CNTL, 0); 4418 } else { 4419 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4420 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4421 adev->gfx.compute_ring[i].ready = false; 4422 } 4423 udelay(50); 4424 } 4425 4426 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4427 { 4428 const struct gfx_firmware_header_v1_0 *mec_hdr; 4429 const __le32 *fw_data; 4430 unsigned i, fw_size; 4431 4432 if (!adev->gfx.mec_fw) 4433 return -EINVAL; 4434 4435 gfx_v8_0_cp_compute_enable(adev, false); 4436 4437 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4438 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4439 4440 fw_data = (const __le32 *) 4441 (adev->gfx.mec_fw->data + 4442 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4443 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4444 4445 /* MEC1 */ 4446 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4447 for (i = 0; i < fw_size; i++) 4448 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4449 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4450 4451 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4452 if (adev->gfx.mec2_fw) { 4453 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4454 4455 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4456 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4457 4458 fw_data = (const __le32 *) 4459 (adev->gfx.mec2_fw->data + 4460 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4461 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4462 4463 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4464 for (i = 0; i < fw_size; i++) 4465 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4466 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4467 } 4468 4469 return 0; 4470 } 4471 4472 struct vi_mqd { 4473 uint32_t header; /* ordinal0 */ 4474 uint32_t compute_dispatch_initiator; /* ordinal1 */ 4475 uint32_t compute_dim_x; /* ordinal2 */ 4476 uint32_t compute_dim_y; /* ordinal3 */ 4477 uint32_t compute_dim_z; /* ordinal4 */ 4478 uint32_t compute_start_x; /* ordinal5 */ 4479 uint32_t compute_start_y; /* ordinal6 */ 4480 uint32_t compute_start_z; /* ordinal7 */ 4481 uint32_t compute_num_thread_x; /* ordinal8 */ 4482 uint32_t compute_num_thread_y; /* ordinal9 */ 4483 uint32_t compute_num_thread_z; /* ordinal10 */ 4484 uint32_t compute_pipelinestat_enable; /* ordinal11 */ 4485 uint32_t compute_perfcount_enable; /* ordinal12 */ 4486 uint32_t compute_pgm_lo; /* ordinal13 */ 4487 uint32_t compute_pgm_hi; /* ordinal14 */ 4488 uint32_t compute_tba_lo; /* ordinal15 */ 4489 uint32_t compute_tba_hi; /* ordinal16 */ 4490 uint32_t compute_tma_lo; /* ordinal17 */ 4491 uint32_t compute_tma_hi; /* ordinal18 */ 4492 uint32_t compute_pgm_rsrc1; /* ordinal19 */ 4493 uint32_t compute_pgm_rsrc2; /* ordinal20 */ 4494 uint32_t compute_vmid; /* ordinal21 */ 4495 uint32_t compute_resource_limits; /* ordinal22 */ 4496 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */ 4497 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */ 4498 uint32_t compute_tmpring_size; /* ordinal25 */ 4499 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */ 4500 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */ 4501 uint32_t compute_restart_x; /* ordinal28 */ 4502 uint32_t compute_restart_y; /* ordinal29 */ 4503 uint32_t compute_restart_z; /* ordinal30 */ 4504 uint32_t compute_thread_trace_enable; /* ordinal31 */ 4505 uint32_t compute_misc_reserved; /* ordinal32 */ 4506 uint32_t compute_dispatch_id; /* ordinal33 */ 4507 uint32_t compute_threadgroup_id; /* ordinal34 */ 4508 uint32_t compute_relaunch; /* ordinal35 */ 4509 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */ 4510 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */ 4511 uint32_t compute_wave_restore_control; /* ordinal38 */ 4512 uint32_t reserved9; /* ordinal39 */ 4513 uint32_t reserved10; /* ordinal40 */ 4514 uint32_t reserved11; /* ordinal41 */ 4515 uint32_t reserved12; /* ordinal42 */ 4516 uint32_t reserved13; /* ordinal43 */ 4517 uint32_t reserved14; /* ordinal44 */ 4518 uint32_t reserved15; /* ordinal45 */ 4519 uint32_t reserved16; /* ordinal46 */ 4520 uint32_t reserved17; /* ordinal47 */ 4521 uint32_t reserved18; /* ordinal48 */ 4522 uint32_t reserved19; /* ordinal49 */ 4523 uint32_t reserved20; /* ordinal50 */ 4524 uint32_t reserved21; /* ordinal51 */ 4525 uint32_t reserved22; /* ordinal52 */ 4526 uint32_t reserved23; /* ordinal53 */ 4527 uint32_t reserved24; /* ordinal54 */ 4528 uint32_t reserved25; /* ordinal55 */ 4529 uint32_t reserved26; /* ordinal56 */ 4530 uint32_t reserved27; /* ordinal57 */ 4531 uint32_t reserved28; /* ordinal58 */ 4532 uint32_t reserved29; /* ordinal59 */ 4533 uint32_t reserved30; /* ordinal60 */ 4534 uint32_t reserved31; /* ordinal61 */ 4535 uint32_t reserved32; /* ordinal62 */ 4536 uint32_t reserved33; /* ordinal63 */ 4537 uint32_t reserved34; /* ordinal64 */ 4538 uint32_t compute_user_data_0; /* ordinal65 */ 4539 uint32_t compute_user_data_1; /* ordinal66 */ 4540 uint32_t compute_user_data_2; /* ordinal67 */ 4541 uint32_t compute_user_data_3; /* ordinal68 */ 4542 uint32_t compute_user_data_4; /* ordinal69 */ 4543 uint32_t compute_user_data_5; /* ordinal70 */ 4544 uint32_t compute_user_data_6; /* ordinal71 */ 4545 uint32_t compute_user_data_7; /* ordinal72 */ 4546 uint32_t compute_user_data_8; /* ordinal73 */ 4547 uint32_t compute_user_data_9; /* ordinal74 */ 4548 uint32_t compute_user_data_10; /* ordinal75 */ 4549 uint32_t compute_user_data_11; /* ordinal76 */ 4550 uint32_t compute_user_data_12; /* ordinal77 */ 4551 uint32_t compute_user_data_13; /* ordinal78 */ 4552 uint32_t compute_user_data_14; /* ordinal79 */ 4553 uint32_t compute_user_data_15; /* ordinal80 */ 4554 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */ 4555 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */ 4556 uint32_t reserved35; /* ordinal83 */ 4557 uint32_t reserved36; /* ordinal84 */ 4558 uint32_t reserved37; /* ordinal85 */ 4559 uint32_t cp_mqd_query_time_lo; /* ordinal86 */ 4560 uint32_t cp_mqd_query_time_hi; /* ordinal87 */ 4561 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */ 4562 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */ 4563 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */ 4564 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */ 4565 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */ 4566 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */ 4567 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */ 4568 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */ 4569 uint32_t reserved38; /* ordinal96 */ 4570 uint32_t reserved39; /* ordinal97 */ 4571 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */ 4572 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */ 4573 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */ 4574 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */ 4575 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */ 4576 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */ 4577 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */ 4578 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */ 4579 uint32_t reserved40; /* ordinal106 */ 4580 uint32_t reserved41; /* ordinal107 */ 4581 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */ 4582 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */ 4583 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */ 4584 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */ 4585 uint32_t reserved42; /* ordinal112 */ 4586 uint32_t reserved43; /* ordinal113 */ 4587 uint32_t cp_pq_exe_status_lo; /* ordinal114 */ 4588 uint32_t cp_pq_exe_status_hi; /* ordinal115 */ 4589 uint32_t cp_packet_id_lo; /* ordinal116 */ 4590 uint32_t cp_packet_id_hi; /* ordinal117 */ 4591 uint32_t cp_packet_exe_status_lo; /* ordinal118 */ 4592 uint32_t cp_packet_exe_status_hi; /* ordinal119 */ 4593 uint32_t gds_save_base_addr_lo; /* ordinal120 */ 4594 uint32_t gds_save_base_addr_hi; /* ordinal121 */ 4595 uint32_t gds_save_mask_lo; /* ordinal122 */ 4596 uint32_t gds_save_mask_hi; /* ordinal123 */ 4597 uint32_t ctx_save_base_addr_lo; /* ordinal124 */ 4598 uint32_t ctx_save_base_addr_hi; /* ordinal125 */ 4599 uint32_t reserved44; /* ordinal126 */ 4600 uint32_t reserved45; /* ordinal127 */ 4601 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */ 4602 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */ 4603 uint32_t cp_hqd_active; /* ordinal130 */ 4604 uint32_t cp_hqd_vmid; /* ordinal131 */ 4605 uint32_t cp_hqd_persistent_state; /* ordinal132 */ 4606 uint32_t cp_hqd_pipe_priority; /* ordinal133 */ 4607 uint32_t cp_hqd_queue_priority; /* ordinal134 */ 4608 uint32_t cp_hqd_quantum; /* ordinal135 */ 4609 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */ 4610 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */ 4611 uint32_t cp_hqd_pq_rptr; /* ordinal138 */ 4612 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */ 4613 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */ 4614 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */ 4615 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */ 4616 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */ 4617 uint32_t cp_hqd_pq_wptr; /* ordinal144 */ 4618 uint32_t cp_hqd_pq_control; /* ordinal145 */ 4619 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */ 4620 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */ 4621 uint32_t cp_hqd_ib_rptr; /* ordinal148 */ 4622 uint32_t cp_hqd_ib_control; /* ordinal149 */ 4623 uint32_t cp_hqd_iq_timer; /* ordinal150 */ 4624 uint32_t cp_hqd_iq_rptr; /* ordinal151 */ 4625 uint32_t cp_hqd_dequeue_request; /* ordinal152 */ 4626 uint32_t cp_hqd_dma_offload; /* ordinal153 */ 4627 uint32_t cp_hqd_sema_cmd; /* ordinal154 */ 4628 uint32_t cp_hqd_msg_type; /* ordinal155 */ 4629 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */ 4630 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */ 4631 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */ 4632 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */ 4633 uint32_t cp_hqd_hq_status0; /* ordinal160 */ 4634 uint32_t cp_hqd_hq_control0; /* ordinal161 */ 4635 uint32_t cp_mqd_control; /* ordinal162 */ 4636 uint32_t cp_hqd_hq_status1; /* ordinal163 */ 4637 uint32_t cp_hqd_hq_control1; /* ordinal164 */ 4638 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */ 4639 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */ 4640 uint32_t cp_hqd_eop_control; /* ordinal167 */ 4641 uint32_t cp_hqd_eop_rptr; /* ordinal168 */ 4642 uint32_t cp_hqd_eop_wptr; /* ordinal169 */ 4643 uint32_t cp_hqd_eop_done_events; /* ordinal170 */ 4644 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */ 4645 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */ 4646 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */ 4647 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */ 4648 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */ 4649 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */ 4650 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */ 4651 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */ 4652 uint32_t cp_hqd_error; /* ordinal179 */ 4653 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */ 4654 uint32_t cp_hqd_eop_dones; /* ordinal181 */ 4655 uint32_t reserved46; /* ordinal182 */ 4656 uint32_t reserved47; /* ordinal183 */ 4657 uint32_t reserved48; /* ordinal184 */ 4658 uint32_t reserved49; /* ordinal185 */ 4659 uint32_t reserved50; /* ordinal186 */ 4660 uint32_t reserved51; /* ordinal187 */ 4661 uint32_t reserved52; /* ordinal188 */ 4662 uint32_t reserved53; /* ordinal189 */ 4663 uint32_t reserved54; /* ordinal190 */ 4664 uint32_t reserved55; /* ordinal191 */ 4665 uint32_t iqtimer_pkt_header; /* ordinal192 */ 4666 uint32_t iqtimer_pkt_dw0; /* ordinal193 */ 4667 uint32_t iqtimer_pkt_dw1; /* ordinal194 */ 4668 uint32_t iqtimer_pkt_dw2; /* ordinal195 */ 4669 uint32_t iqtimer_pkt_dw3; /* ordinal196 */ 4670 uint32_t iqtimer_pkt_dw4; /* ordinal197 */ 4671 uint32_t iqtimer_pkt_dw5; /* ordinal198 */ 4672 uint32_t iqtimer_pkt_dw6; /* ordinal199 */ 4673 uint32_t iqtimer_pkt_dw7; /* ordinal200 */ 4674 uint32_t iqtimer_pkt_dw8; /* ordinal201 */ 4675 uint32_t iqtimer_pkt_dw9; /* ordinal202 */ 4676 uint32_t iqtimer_pkt_dw10; /* ordinal203 */ 4677 uint32_t iqtimer_pkt_dw11; /* ordinal204 */ 4678 uint32_t iqtimer_pkt_dw12; /* ordinal205 */ 4679 uint32_t iqtimer_pkt_dw13; /* ordinal206 */ 4680 uint32_t iqtimer_pkt_dw14; /* ordinal207 */ 4681 uint32_t iqtimer_pkt_dw15; /* ordinal208 */ 4682 uint32_t iqtimer_pkt_dw16; /* ordinal209 */ 4683 uint32_t iqtimer_pkt_dw17; /* ordinal210 */ 4684 uint32_t iqtimer_pkt_dw18; /* ordinal211 */ 4685 uint32_t iqtimer_pkt_dw19; /* ordinal212 */ 4686 uint32_t iqtimer_pkt_dw20; /* ordinal213 */ 4687 uint32_t iqtimer_pkt_dw21; /* ordinal214 */ 4688 uint32_t iqtimer_pkt_dw22; /* ordinal215 */ 4689 uint32_t iqtimer_pkt_dw23; /* ordinal216 */ 4690 uint32_t iqtimer_pkt_dw24; /* ordinal217 */ 4691 uint32_t iqtimer_pkt_dw25; /* ordinal218 */ 4692 uint32_t iqtimer_pkt_dw26; /* ordinal219 */ 4693 uint32_t iqtimer_pkt_dw27; /* ordinal220 */ 4694 uint32_t iqtimer_pkt_dw28; /* ordinal221 */ 4695 uint32_t iqtimer_pkt_dw29; /* ordinal222 */ 4696 uint32_t iqtimer_pkt_dw30; /* ordinal223 */ 4697 uint32_t iqtimer_pkt_dw31; /* ordinal224 */ 4698 uint32_t reserved56; /* ordinal225 */ 4699 uint32_t reserved57; /* ordinal226 */ 4700 uint32_t reserved58; /* ordinal227 */ 4701 uint32_t set_resources_header; /* ordinal228 */ 4702 uint32_t set_resources_dw1; /* ordinal229 */ 4703 uint32_t set_resources_dw2; /* ordinal230 */ 4704 uint32_t set_resources_dw3; /* ordinal231 */ 4705 uint32_t set_resources_dw4; /* ordinal232 */ 4706 uint32_t set_resources_dw5; /* ordinal233 */ 4707 uint32_t set_resources_dw6; /* ordinal234 */ 4708 uint32_t set_resources_dw7; /* ordinal235 */ 4709 uint32_t reserved59; /* ordinal236 */ 4710 uint32_t reserved60; /* ordinal237 */ 4711 uint32_t reserved61; /* ordinal238 */ 4712 uint32_t reserved62; /* ordinal239 */ 4713 uint32_t reserved63; /* ordinal240 */ 4714 uint32_t reserved64; /* ordinal241 */ 4715 uint32_t reserved65; /* ordinal242 */ 4716 uint32_t reserved66; /* ordinal243 */ 4717 uint32_t reserved67; /* ordinal244 */ 4718 uint32_t reserved68; /* ordinal245 */ 4719 uint32_t reserved69; /* ordinal246 */ 4720 uint32_t reserved70; /* ordinal247 */ 4721 uint32_t reserved71; /* ordinal248 */ 4722 uint32_t reserved72; /* ordinal249 */ 4723 uint32_t reserved73; /* ordinal250 */ 4724 uint32_t reserved74; /* ordinal251 */ 4725 uint32_t reserved75; /* ordinal252 */ 4726 uint32_t reserved76; /* ordinal253 */ 4727 uint32_t reserved77; /* ordinal254 */ 4728 uint32_t reserved78; /* ordinal255 */ 4729 4730 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */ 4731 }; 4732 4733 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4734 { 4735 int i, r; 4736 4737 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4738 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4739 4740 if (ring->mqd_obj) { 4741 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4742 if (unlikely(r != 0)) 4743 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4744 4745 amdgpu_bo_unpin(ring->mqd_obj); 4746 amdgpu_bo_unreserve(ring->mqd_obj); 4747 4748 amdgpu_bo_unref(&ring->mqd_obj); 4749 ring->mqd_obj = NULL; 4750 } 4751 } 4752 } 4753 4754 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4755 { 4756 int r, i, j; 4757 u32 tmp; 4758 bool use_doorbell = true; 4759 u64 hqd_gpu_addr; 4760 u64 mqd_gpu_addr; 4761 u64 eop_gpu_addr; 4762 u64 wb_gpu_addr; 4763 u32 *buf; 4764 struct vi_mqd *mqd; 4765 4766 /* init the pipes */ 4767 mutex_lock(&adev->srbm_mutex); 4768 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 4769 int me = (i < 4) ? 1 : 2; 4770 int pipe = (i < 4) ? i : (i - 4); 4771 4772 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 4773 eop_gpu_addr >>= 8; 4774 4775 vi_srbm_select(adev, me, pipe, 0, 0); 4776 4777 /* write the EOP addr */ 4778 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 4779 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 4780 4781 /* set the VMID assigned */ 4782 WREG32(mmCP_HQD_VMID, 0); 4783 4784 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4785 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4786 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4787 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4788 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 4789 } 4790 vi_srbm_select(adev, 0, 0, 0, 0); 4791 mutex_unlock(&adev->srbm_mutex); 4792 4793 /* init the queues. Just two for now. */ 4794 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4795 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4796 4797 if (ring->mqd_obj == NULL) { 4798 r = amdgpu_bo_create(adev, 4799 sizeof(struct vi_mqd), 4800 PAGE_SIZE, true, 4801 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 4802 NULL, &ring->mqd_obj); 4803 if (r) { 4804 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 4805 return r; 4806 } 4807 } 4808 4809 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4810 if (unlikely(r != 0)) { 4811 gfx_v8_0_cp_compute_fini(adev); 4812 return r; 4813 } 4814 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 4815 &mqd_gpu_addr); 4816 if (r) { 4817 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 4818 gfx_v8_0_cp_compute_fini(adev); 4819 return r; 4820 } 4821 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 4822 if (r) { 4823 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 4824 gfx_v8_0_cp_compute_fini(adev); 4825 return r; 4826 } 4827 4828 /* init the mqd struct */ 4829 memset(buf, 0, sizeof(struct vi_mqd)); 4830 4831 mqd = (struct vi_mqd *)buf; 4832 mqd->header = 0xC0310800; 4833 mqd->compute_pipelinestat_enable = 0x00000001; 4834 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4835 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4836 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4837 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4838 mqd->compute_misc_reserved = 0x00000003; 4839 4840 mutex_lock(&adev->srbm_mutex); 4841 vi_srbm_select(adev, ring->me, 4842 ring->pipe, 4843 ring->queue, 0); 4844 4845 /* disable wptr polling */ 4846 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4847 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4848 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4849 4850 mqd->cp_hqd_eop_base_addr_lo = 4851 RREG32(mmCP_HQD_EOP_BASE_ADDR); 4852 mqd->cp_hqd_eop_base_addr_hi = 4853 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 4854 4855 /* enable doorbell? */ 4856 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4857 if (use_doorbell) { 4858 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4859 } else { 4860 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 4861 } 4862 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 4863 mqd->cp_hqd_pq_doorbell_control = tmp; 4864 4865 /* disable the queue if it's active */ 4866 mqd->cp_hqd_dequeue_request = 0; 4867 mqd->cp_hqd_pq_rptr = 0; 4868 mqd->cp_hqd_pq_wptr= 0; 4869 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4870 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4871 for (j = 0; j < adev->usec_timeout; j++) { 4872 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4873 break; 4874 udelay(1); 4875 } 4876 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4877 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4878 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4879 } 4880 4881 /* set the pointer to the MQD */ 4882 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 4883 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4884 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4885 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4886 4887 /* set MQD vmid to 0 */ 4888 tmp = RREG32(mmCP_MQD_CONTROL); 4889 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4890 WREG32(mmCP_MQD_CONTROL, tmp); 4891 mqd->cp_mqd_control = tmp; 4892 4893 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4894 hqd_gpu_addr = ring->gpu_addr >> 8; 4895 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4896 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4897 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4898 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4899 4900 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4901 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4902 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4903 (order_base_2(ring->ring_size / 4) - 1)); 4904 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4905 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4906 #ifdef __BIG_ENDIAN 4907 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4908 #endif 4909 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4911 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4912 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4913 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 4914 mqd->cp_hqd_pq_control = tmp; 4915 4916 /* set the wb address wether it's enabled or not */ 4917 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4918 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4919 mqd->cp_hqd_pq_rptr_report_addr_hi = 4920 upper_32_bits(wb_gpu_addr) & 0xffff; 4921 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4922 mqd->cp_hqd_pq_rptr_report_addr_lo); 4923 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4924 mqd->cp_hqd_pq_rptr_report_addr_hi); 4925 4926 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4927 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4928 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4929 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4930 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr); 4931 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4932 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4933 4934 /* enable the doorbell if requested */ 4935 if (use_doorbell) { 4936 if ((adev->asic_type == CHIP_CARRIZO) || 4937 (adev->asic_type == CHIP_FIJI) || 4938 (adev->asic_type == CHIP_STONEY) || 4939 (adev->asic_type == CHIP_POLARIS11) || 4940 (adev->asic_type == CHIP_POLARIS10)) { 4941 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4942 AMDGPU_DOORBELL_KIQ << 2); 4943 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4944 AMDGPU_DOORBELL_MEC_RING7 << 2); 4945 } 4946 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4947 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4948 DOORBELL_OFFSET, ring->doorbell_index); 4949 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4950 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 4951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 4952 mqd->cp_hqd_pq_doorbell_control = tmp; 4953 4954 } else { 4955 mqd->cp_hqd_pq_doorbell_control = 0; 4956 } 4957 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 4958 mqd->cp_hqd_pq_doorbell_control); 4959 4960 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4961 ring->wptr = 0; 4962 mqd->cp_hqd_pq_wptr = ring->wptr; 4963 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4964 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4965 4966 /* set the vmid for the queue */ 4967 mqd->cp_hqd_vmid = 0; 4968 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4969 4970 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4971 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4972 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 4973 mqd->cp_hqd_persistent_state = tmp; 4974 if (adev->asic_type == CHIP_STONEY || 4975 adev->asic_type == CHIP_POLARIS11 || 4976 adev->asic_type == CHIP_POLARIS10) { 4977 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 4978 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 4979 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 4980 } 4981 4982 /* activate the queue */ 4983 mqd->cp_hqd_active = 1; 4984 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4985 4986 vi_srbm_select(adev, 0, 0, 0, 0); 4987 mutex_unlock(&adev->srbm_mutex); 4988 4989 amdgpu_bo_kunmap(ring->mqd_obj); 4990 amdgpu_bo_unreserve(ring->mqd_obj); 4991 } 4992 4993 if (use_doorbell) { 4994 tmp = RREG32(mmCP_PQ_STATUS); 4995 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4996 WREG32(mmCP_PQ_STATUS, tmp); 4997 } 4998 4999 gfx_v8_0_cp_compute_enable(adev, true); 5000 5001 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5002 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5003 5004 ring->ready = true; 5005 r = amdgpu_ring_test_ring(ring); 5006 if (r) 5007 ring->ready = false; 5008 } 5009 5010 return 0; 5011 } 5012 5013 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5014 { 5015 int r; 5016 5017 if (!(adev->flags & AMD_IS_APU)) 5018 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5019 5020 if (!adev->pp_enabled) { 5021 if (!adev->firmware.smu_load) { 5022 /* legacy firmware loading */ 5023 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5024 if (r) 5025 return r; 5026 5027 r = gfx_v8_0_cp_compute_load_microcode(adev); 5028 if (r) 5029 return r; 5030 } else { 5031 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5032 AMDGPU_UCODE_ID_CP_CE); 5033 if (r) 5034 return -EINVAL; 5035 5036 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5037 AMDGPU_UCODE_ID_CP_PFP); 5038 if (r) 5039 return -EINVAL; 5040 5041 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5042 AMDGPU_UCODE_ID_CP_ME); 5043 if (r) 5044 return -EINVAL; 5045 5046 if (adev->asic_type == CHIP_TOPAZ) { 5047 r = gfx_v8_0_cp_compute_load_microcode(adev); 5048 if (r) 5049 return r; 5050 } else { 5051 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5052 AMDGPU_UCODE_ID_CP_MEC1); 5053 if (r) 5054 return -EINVAL; 5055 } 5056 } 5057 } 5058 5059 r = gfx_v8_0_cp_gfx_resume(adev); 5060 if (r) 5061 return r; 5062 5063 r = gfx_v8_0_cp_compute_resume(adev); 5064 if (r) 5065 return r; 5066 5067 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5068 5069 return 0; 5070 } 5071 5072 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5073 { 5074 gfx_v8_0_cp_gfx_enable(adev, enable); 5075 gfx_v8_0_cp_compute_enable(adev, enable); 5076 } 5077 5078 static int gfx_v8_0_hw_init(void *handle) 5079 { 5080 int r; 5081 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5082 5083 gfx_v8_0_init_golden_registers(adev); 5084 gfx_v8_0_gpu_init(adev); 5085 5086 r = gfx_v8_0_rlc_resume(adev); 5087 if (r) 5088 return r; 5089 5090 r = gfx_v8_0_cp_resume(adev); 5091 5092 return r; 5093 } 5094 5095 static int gfx_v8_0_hw_fini(void *handle) 5096 { 5097 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5098 5099 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5100 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5101 gfx_v8_0_cp_enable(adev, false); 5102 gfx_v8_0_rlc_stop(adev); 5103 gfx_v8_0_cp_compute_fini(adev); 5104 5105 amdgpu_set_powergating_state(adev, 5106 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5107 5108 return 0; 5109 } 5110 5111 static int gfx_v8_0_suspend(void *handle) 5112 { 5113 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5114 5115 return gfx_v8_0_hw_fini(adev); 5116 } 5117 5118 static int gfx_v8_0_resume(void *handle) 5119 { 5120 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5121 5122 return gfx_v8_0_hw_init(adev); 5123 } 5124 5125 static bool gfx_v8_0_is_idle(void *handle) 5126 { 5127 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5128 5129 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5130 return false; 5131 else 5132 return true; 5133 } 5134 5135 static int gfx_v8_0_wait_for_idle(void *handle) 5136 { 5137 unsigned i; 5138 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5139 5140 for (i = 0; i < adev->usec_timeout; i++) { 5141 if (gfx_v8_0_is_idle(handle)) 5142 return 0; 5143 5144 udelay(1); 5145 } 5146 return -ETIMEDOUT; 5147 } 5148 5149 static bool gfx_v8_0_check_soft_reset(void *handle) 5150 { 5151 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5152 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5153 u32 tmp; 5154 5155 /* GRBM_STATUS */ 5156 tmp = RREG32(mmGRBM_STATUS); 5157 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5158 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5159 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5160 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5161 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5162 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5163 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5165 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5167 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5168 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5169 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5170 } 5171 5172 /* GRBM_STATUS2 */ 5173 tmp = RREG32(mmGRBM_STATUS2); 5174 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5175 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5176 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5177 5178 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5179 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5180 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5181 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5182 SOFT_RESET_CPF, 1); 5183 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5184 SOFT_RESET_CPC, 1); 5185 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5186 SOFT_RESET_CPG, 1); 5187 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5188 SOFT_RESET_GRBM, 1); 5189 } 5190 5191 /* SRBM_STATUS */ 5192 tmp = RREG32(mmSRBM_STATUS); 5193 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5194 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5195 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5196 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5197 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5198 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5199 5200 if (grbm_soft_reset || srbm_soft_reset) { 5201 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5202 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5203 return true; 5204 } else { 5205 adev->gfx.grbm_soft_reset = 0; 5206 adev->gfx.srbm_soft_reset = 0; 5207 return false; 5208 } 5209 } 5210 5211 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 5212 struct amdgpu_ring *ring) 5213 { 5214 int i; 5215 5216 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5217 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 5218 u32 tmp; 5219 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 5220 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, 5221 DEQUEUE_REQ, 2); 5222 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); 5223 for (i = 0; i < adev->usec_timeout; i++) { 5224 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 5225 break; 5226 udelay(1); 5227 } 5228 } 5229 } 5230 5231 static int gfx_v8_0_pre_soft_reset(void *handle) 5232 { 5233 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5234 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5235 5236 if ((!adev->gfx.grbm_soft_reset) && 5237 (!adev->gfx.srbm_soft_reset)) 5238 return 0; 5239 5240 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5241 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5242 5243 /* stop the rlc */ 5244 gfx_v8_0_rlc_stop(adev); 5245 5246 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5247 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5248 /* Disable GFX parsing/prefetching */ 5249 gfx_v8_0_cp_gfx_enable(adev, false); 5250 5251 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5252 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5253 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5254 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5255 int i; 5256 5257 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5258 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5259 5260 gfx_v8_0_inactive_hqd(adev, ring); 5261 } 5262 /* Disable MEC parsing/prefetching */ 5263 gfx_v8_0_cp_compute_enable(adev, false); 5264 } 5265 5266 return 0; 5267 } 5268 5269 static int gfx_v8_0_soft_reset(void *handle) 5270 { 5271 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5272 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5273 u32 tmp; 5274 5275 if ((!adev->gfx.grbm_soft_reset) && 5276 (!adev->gfx.srbm_soft_reset)) 5277 return 0; 5278 5279 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5280 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5281 5282 if (grbm_soft_reset || srbm_soft_reset) { 5283 tmp = RREG32(mmGMCON_DEBUG); 5284 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5285 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5286 WREG32(mmGMCON_DEBUG, tmp); 5287 udelay(50); 5288 } 5289 5290 if (grbm_soft_reset) { 5291 tmp = RREG32(mmGRBM_SOFT_RESET); 5292 tmp |= grbm_soft_reset; 5293 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5294 WREG32(mmGRBM_SOFT_RESET, tmp); 5295 tmp = RREG32(mmGRBM_SOFT_RESET); 5296 5297 udelay(50); 5298 5299 tmp &= ~grbm_soft_reset; 5300 WREG32(mmGRBM_SOFT_RESET, tmp); 5301 tmp = RREG32(mmGRBM_SOFT_RESET); 5302 } 5303 5304 if (srbm_soft_reset) { 5305 tmp = RREG32(mmSRBM_SOFT_RESET); 5306 tmp |= srbm_soft_reset; 5307 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5308 WREG32(mmSRBM_SOFT_RESET, tmp); 5309 tmp = RREG32(mmSRBM_SOFT_RESET); 5310 5311 udelay(50); 5312 5313 tmp &= ~srbm_soft_reset; 5314 WREG32(mmSRBM_SOFT_RESET, tmp); 5315 tmp = RREG32(mmSRBM_SOFT_RESET); 5316 } 5317 5318 if (grbm_soft_reset || srbm_soft_reset) { 5319 tmp = RREG32(mmGMCON_DEBUG); 5320 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5321 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5322 WREG32(mmGMCON_DEBUG, tmp); 5323 } 5324 5325 /* Wait a little for things to settle down */ 5326 udelay(50); 5327 5328 return 0; 5329 } 5330 5331 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5332 struct amdgpu_ring *ring) 5333 { 5334 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5335 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5336 WREG32(mmCP_HQD_PQ_RPTR, 0); 5337 WREG32(mmCP_HQD_PQ_WPTR, 0); 5338 vi_srbm_select(adev, 0, 0, 0, 0); 5339 } 5340 5341 static int gfx_v8_0_post_soft_reset(void *handle) 5342 { 5343 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5344 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5345 5346 if ((!adev->gfx.grbm_soft_reset) && 5347 (!adev->gfx.srbm_soft_reset)) 5348 return 0; 5349 5350 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5351 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5352 5353 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5354 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5355 gfx_v8_0_cp_gfx_resume(adev); 5356 5357 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5359 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5360 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5361 int i; 5362 5363 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5364 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5365 5366 gfx_v8_0_init_hqd(adev, ring); 5367 } 5368 gfx_v8_0_cp_compute_resume(adev); 5369 } 5370 gfx_v8_0_rlc_start(adev); 5371 5372 return 0; 5373 } 5374 5375 /** 5376 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5377 * 5378 * @adev: amdgpu_device pointer 5379 * 5380 * Fetches a GPU clock counter snapshot. 5381 * Returns the 64 bit clock counter snapshot. 5382 */ 5383 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5384 { 5385 uint64_t clock; 5386 5387 mutex_lock(&adev->gfx.gpu_clock_mutex); 5388 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5389 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5390 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5391 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5392 return clock; 5393 } 5394 5395 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5396 uint32_t vmid, 5397 uint32_t gds_base, uint32_t gds_size, 5398 uint32_t gws_base, uint32_t gws_size, 5399 uint32_t oa_base, uint32_t oa_size) 5400 { 5401 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5402 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5403 5404 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5405 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5406 5407 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5408 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5409 5410 /* GDS Base */ 5411 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5412 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5413 WRITE_DATA_DST_SEL(0))); 5414 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5415 amdgpu_ring_write(ring, 0); 5416 amdgpu_ring_write(ring, gds_base); 5417 5418 /* GDS Size */ 5419 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5420 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5421 WRITE_DATA_DST_SEL(0))); 5422 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5423 amdgpu_ring_write(ring, 0); 5424 amdgpu_ring_write(ring, gds_size); 5425 5426 /* GWS */ 5427 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5428 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5429 WRITE_DATA_DST_SEL(0))); 5430 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5431 amdgpu_ring_write(ring, 0); 5432 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5433 5434 /* OA */ 5435 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5436 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5437 WRITE_DATA_DST_SEL(0))); 5438 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5439 amdgpu_ring_write(ring, 0); 5440 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5441 } 5442 5443 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5444 { 5445 WREG32(mmSQ_IND_INDEX, 5446 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5447 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5448 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5449 (SQ_IND_INDEX__FORCE_READ_MASK)); 5450 return RREG32(mmSQ_IND_DATA); 5451 } 5452 5453 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5454 { 5455 /* type 0 wave data */ 5456 dst[(*no_fields)++] = 0; 5457 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5458 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5459 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5460 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5461 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5462 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5463 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5464 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5465 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5466 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5467 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5468 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5469 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5470 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5471 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5472 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5473 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5474 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5475 } 5476 5477 5478 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5479 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5480 .select_se_sh = &gfx_v8_0_select_se_sh, 5481 .read_wave_data = &gfx_v8_0_read_wave_data, 5482 }; 5483 5484 static int gfx_v8_0_early_init(void *handle) 5485 { 5486 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5487 5488 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5489 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5490 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5491 gfx_v8_0_set_ring_funcs(adev); 5492 gfx_v8_0_set_irq_funcs(adev); 5493 gfx_v8_0_set_gds_init(adev); 5494 gfx_v8_0_set_rlc_funcs(adev); 5495 5496 return 0; 5497 } 5498 5499 static int gfx_v8_0_late_init(void *handle) 5500 { 5501 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5502 int r; 5503 5504 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5505 if (r) 5506 return r; 5507 5508 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5509 if (r) 5510 return r; 5511 5512 /* requires IBs so do in late init after IB pool is initialized */ 5513 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5514 if (r) 5515 return r; 5516 5517 amdgpu_set_powergating_state(adev, 5518 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5519 5520 return 0; 5521 } 5522 5523 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5524 bool enable) 5525 { 5526 if (adev->asic_type == CHIP_POLARIS11) 5527 /* Send msg to SMU via Powerplay */ 5528 amdgpu_set_powergating_state(adev, 5529 AMD_IP_BLOCK_TYPE_SMC, 5530 enable ? 5531 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5532 5533 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5534 } 5535 5536 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5537 bool enable) 5538 { 5539 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5540 } 5541 5542 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5543 bool enable) 5544 { 5545 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5546 } 5547 5548 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5549 bool enable) 5550 { 5551 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5552 } 5553 5554 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5555 bool enable) 5556 { 5557 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5558 5559 /* Read any GFX register to wake up GFX. */ 5560 if (!enable) 5561 RREG32(mmDB_RENDER_CONTROL); 5562 } 5563 5564 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5565 bool enable) 5566 { 5567 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5568 cz_enable_gfx_cg_power_gating(adev, true); 5569 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5570 cz_enable_gfx_pipeline_power_gating(adev, true); 5571 } else { 5572 cz_enable_gfx_cg_power_gating(adev, false); 5573 cz_enable_gfx_pipeline_power_gating(adev, false); 5574 } 5575 } 5576 5577 static int gfx_v8_0_set_powergating_state(void *handle, 5578 enum amd_powergating_state state) 5579 { 5580 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5581 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 5582 5583 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5584 return 0; 5585 5586 switch (adev->asic_type) { 5587 case CHIP_CARRIZO: 5588 case CHIP_STONEY: 5589 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) 5590 cz_update_gfx_cg_power_gating(adev, enable); 5591 5592 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5593 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5594 else 5595 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5596 5597 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5598 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5599 else 5600 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5601 break; 5602 case CHIP_POLARIS11: 5603 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5605 else 5606 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5607 5608 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5610 else 5611 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5612 5613 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5614 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5615 else 5616 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5617 break; 5618 default: 5619 break; 5620 } 5621 5622 return 0; 5623 } 5624 5625 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5626 uint32_t reg_addr, uint32_t cmd) 5627 { 5628 uint32_t data; 5629 5630 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5631 5632 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5633 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5634 5635 data = RREG32(mmRLC_SERDES_WR_CTRL); 5636 if (adev->asic_type == CHIP_STONEY) 5637 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5638 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5639 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5640 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5641 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5642 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5643 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5644 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5645 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5646 else 5647 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5648 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5649 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5650 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5651 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5652 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5653 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5654 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5655 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5656 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5657 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5658 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5659 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5660 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5661 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5662 5663 WREG32(mmRLC_SERDES_WR_CTRL, data); 5664 } 5665 5666 #define MSG_ENTER_RLC_SAFE_MODE 1 5667 #define MSG_EXIT_RLC_SAFE_MODE 0 5668 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5669 #define RLC_GPR_REG2__REQ__SHIFT 0 5670 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5671 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5672 5673 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) 5674 { 5675 u32 data = 0; 5676 unsigned i; 5677 5678 data = RREG32(mmRLC_CNTL); 5679 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5680 return; 5681 5682 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5683 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5684 AMD_PG_SUPPORT_GFX_DMG))) { 5685 data |= RLC_GPR_REG2__REQ_MASK; 5686 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5687 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5688 WREG32(mmRLC_GPR_REG2, data); 5689 5690 for (i = 0; i < adev->usec_timeout; i++) { 5691 if ((RREG32(mmRLC_GPM_STAT) & 5692 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5693 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5694 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5695 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5696 break; 5697 udelay(1); 5698 } 5699 5700 for (i = 0; i < adev->usec_timeout; i++) { 5701 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5702 break; 5703 udelay(1); 5704 } 5705 adev->gfx.rlc.in_safe_mode = true; 5706 } 5707 } 5708 5709 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) 5710 { 5711 u32 data; 5712 unsigned i; 5713 5714 data = RREG32(mmRLC_CNTL); 5715 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5716 return; 5717 5718 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5719 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5720 AMD_PG_SUPPORT_GFX_DMG))) { 5721 data |= RLC_GPR_REG2__REQ_MASK; 5722 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5723 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5724 WREG32(mmRLC_GPR_REG2, data); 5725 adev->gfx.rlc.in_safe_mode = false; 5726 } 5727 5728 for (i = 0; i < adev->usec_timeout; i++) { 5729 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5730 break; 5731 udelay(1); 5732 } 5733 } 5734 5735 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5736 { 5737 u32 data; 5738 unsigned i; 5739 5740 data = RREG32(mmRLC_CNTL); 5741 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5742 return; 5743 5744 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5745 data |= RLC_SAFE_MODE__CMD_MASK; 5746 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5747 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5748 WREG32(mmRLC_SAFE_MODE, data); 5749 5750 for (i = 0; i < adev->usec_timeout; i++) { 5751 if ((RREG32(mmRLC_GPM_STAT) & 5752 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5753 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5754 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5755 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5756 break; 5757 udelay(1); 5758 } 5759 5760 for (i = 0; i < adev->usec_timeout; i++) { 5761 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5762 break; 5763 udelay(1); 5764 } 5765 adev->gfx.rlc.in_safe_mode = true; 5766 } 5767 } 5768 5769 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5770 { 5771 u32 data = 0; 5772 unsigned i; 5773 5774 data = RREG32(mmRLC_CNTL); 5775 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5776 return; 5777 5778 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5779 if (adev->gfx.rlc.in_safe_mode) { 5780 data |= RLC_SAFE_MODE__CMD_MASK; 5781 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5782 WREG32(mmRLC_SAFE_MODE, data); 5783 adev->gfx.rlc.in_safe_mode = false; 5784 } 5785 } 5786 5787 for (i = 0; i < adev->usec_timeout; i++) { 5788 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5789 break; 5790 udelay(1); 5791 } 5792 } 5793 5794 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) 5795 { 5796 adev->gfx.rlc.in_safe_mode = true; 5797 } 5798 5799 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) 5800 { 5801 adev->gfx.rlc.in_safe_mode = false; 5802 } 5803 5804 static const struct amdgpu_rlc_funcs cz_rlc_funcs = { 5805 .enter_safe_mode = cz_enter_rlc_safe_mode, 5806 .exit_safe_mode = cz_exit_rlc_safe_mode 5807 }; 5808 5809 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5810 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5811 .exit_safe_mode = iceland_exit_rlc_safe_mode 5812 }; 5813 5814 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { 5815 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, 5816 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode 5817 }; 5818 5819 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5820 bool enable) 5821 { 5822 uint32_t temp, data; 5823 5824 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5825 5826 /* It is disabled by HW by default */ 5827 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5828 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5829 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5830 /* 1 - RLC memory Light sleep */ 5831 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5832 5833 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5834 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5835 } 5836 5837 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5838 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5839 if (adev->flags & AMD_IS_APU) 5840 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5841 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5842 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5843 else 5844 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5845 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5846 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5847 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5848 5849 if (temp != data) 5850 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5851 5852 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5853 gfx_v8_0_wait_for_rlc_serdes(adev); 5854 5855 /* 5 - clear mgcg override */ 5856 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5857 5858 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5859 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5860 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5861 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5862 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5863 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5864 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5865 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5866 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5867 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5868 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5869 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5870 if (temp != data) 5871 WREG32(mmCGTS_SM_CTRL_REG, data); 5872 } 5873 udelay(50); 5874 5875 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5876 gfx_v8_0_wait_for_rlc_serdes(adev); 5877 } else { 5878 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5879 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5880 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5881 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5882 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5883 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5884 if (temp != data) 5885 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5886 5887 /* 2 - disable MGLS in RLC */ 5888 data = RREG32(mmRLC_MEM_SLP_CNTL); 5889 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5890 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5891 WREG32(mmRLC_MEM_SLP_CNTL, data); 5892 } 5893 5894 /* 3 - disable MGLS in CP */ 5895 data = RREG32(mmCP_MEM_SLP_CNTL); 5896 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5897 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5898 WREG32(mmCP_MEM_SLP_CNTL, data); 5899 } 5900 5901 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5902 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5903 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5904 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5905 if (temp != data) 5906 WREG32(mmCGTS_SM_CTRL_REG, data); 5907 5908 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5909 gfx_v8_0_wait_for_rlc_serdes(adev); 5910 5911 /* 6 - set mgcg override */ 5912 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5913 5914 udelay(50); 5915 5916 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5917 gfx_v8_0_wait_for_rlc_serdes(adev); 5918 } 5919 5920 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5921 } 5922 5923 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5924 bool enable) 5925 { 5926 uint32_t temp, temp1, data, data1; 5927 5928 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5929 5930 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5931 5932 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5933 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5934 * Cmp_busy/GFX_Idle interrupts 5935 */ 5936 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5937 5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5939 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5940 if (temp1 != data1) 5941 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5942 5943 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5944 gfx_v8_0_wait_for_rlc_serdes(adev); 5945 5946 /* 3 - clear cgcg override */ 5947 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5948 5949 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5950 gfx_v8_0_wait_for_rlc_serdes(adev); 5951 5952 /* 4 - write cmd to set CGLS */ 5953 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5954 5955 /* 5 - enable cgcg */ 5956 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5957 5958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5959 /* enable cgls*/ 5960 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5961 5962 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5963 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5964 5965 if (temp1 != data1) 5966 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5967 } else { 5968 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5969 } 5970 5971 if (temp != data) 5972 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5973 } else { 5974 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5975 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5976 5977 /* TEST CGCG */ 5978 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5979 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5980 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5981 if (temp1 != data1) 5982 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5983 5984 /* read gfx register to wake up cgcg */ 5985 RREG32(mmCB_CGTT_SCLK_CTRL); 5986 RREG32(mmCB_CGTT_SCLK_CTRL); 5987 RREG32(mmCB_CGTT_SCLK_CTRL); 5988 RREG32(mmCB_CGTT_SCLK_CTRL); 5989 5990 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5991 gfx_v8_0_wait_for_rlc_serdes(adev); 5992 5993 /* write cmd to Set CGCG Overrride */ 5994 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5995 5996 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5997 gfx_v8_0_wait_for_rlc_serdes(adev); 5998 5999 /* write cmd to Clear CGLS */ 6000 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6001 6002 /* disable cgcg, cgls should be disabled too. */ 6003 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6004 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6005 if (temp != data) 6006 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6007 } 6008 6009 gfx_v8_0_wait_for_rlc_serdes(adev); 6010 6011 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6012 } 6013 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6014 bool enable) 6015 { 6016 if (enable) { 6017 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6018 * === MGCG + MGLS + TS(CG/LS) === 6019 */ 6020 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6021 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6022 } else { 6023 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6024 * === CGCG + CGLS === 6025 */ 6026 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6027 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6028 } 6029 return 0; 6030 } 6031 6032 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6033 enum amd_clockgating_state state) 6034 { 6035 uint32_t msg_id, pp_state; 6036 void *pp_handle = adev->powerplay.pp_handle; 6037 6038 if (state == AMD_CG_STATE_UNGATE) 6039 pp_state = 0; 6040 else 6041 pp_state = PP_STATE_CG | PP_STATE_LS; 6042 6043 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6044 PP_BLOCK_GFX_CG, 6045 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6046 pp_state); 6047 amd_set_clockgating_by_smu(pp_handle, msg_id); 6048 6049 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6050 PP_BLOCK_GFX_MG, 6051 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6052 pp_state); 6053 amd_set_clockgating_by_smu(pp_handle, msg_id); 6054 6055 return 0; 6056 } 6057 6058 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6059 enum amd_clockgating_state state) 6060 { 6061 uint32_t msg_id, pp_state; 6062 void *pp_handle = adev->powerplay.pp_handle; 6063 6064 if (state == AMD_CG_STATE_UNGATE) 6065 pp_state = 0; 6066 else 6067 pp_state = PP_STATE_CG | PP_STATE_LS; 6068 6069 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6070 PP_BLOCK_GFX_CG, 6071 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6072 pp_state); 6073 amd_set_clockgating_by_smu(pp_handle, msg_id); 6074 6075 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6076 PP_BLOCK_GFX_3D, 6077 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6078 pp_state); 6079 amd_set_clockgating_by_smu(pp_handle, msg_id); 6080 6081 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6082 PP_BLOCK_GFX_MG, 6083 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6084 pp_state); 6085 amd_set_clockgating_by_smu(pp_handle, msg_id); 6086 6087 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6088 PP_BLOCK_GFX_RLC, 6089 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6090 pp_state); 6091 amd_set_clockgating_by_smu(pp_handle, msg_id); 6092 6093 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6094 PP_BLOCK_GFX_CP, 6095 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6096 pp_state); 6097 amd_set_clockgating_by_smu(pp_handle, msg_id); 6098 6099 return 0; 6100 } 6101 6102 static int gfx_v8_0_set_clockgating_state(void *handle, 6103 enum amd_clockgating_state state) 6104 { 6105 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6106 6107 switch (adev->asic_type) { 6108 case CHIP_FIJI: 6109 case CHIP_CARRIZO: 6110 case CHIP_STONEY: 6111 gfx_v8_0_update_gfx_clock_gating(adev, 6112 state == AMD_CG_STATE_GATE ? true : false); 6113 break; 6114 case CHIP_TONGA: 6115 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6116 break; 6117 case CHIP_POLARIS10: 6118 case CHIP_POLARIS11: 6119 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6120 break; 6121 default: 6122 break; 6123 } 6124 return 0; 6125 } 6126 6127 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6128 { 6129 return ring->adev->wb.wb[ring->rptr_offs]; 6130 } 6131 6132 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6133 { 6134 struct amdgpu_device *adev = ring->adev; 6135 6136 if (ring->use_doorbell) 6137 /* XXX check if swapping is necessary on BE */ 6138 return ring->adev->wb.wb[ring->wptr_offs]; 6139 else 6140 return RREG32(mmCP_RB0_WPTR); 6141 } 6142 6143 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6144 { 6145 struct amdgpu_device *adev = ring->adev; 6146 6147 if (ring->use_doorbell) { 6148 /* XXX check if swapping is necessary on BE */ 6149 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6150 WDOORBELL32(ring->doorbell_index, ring->wptr); 6151 } else { 6152 WREG32(mmCP_RB0_WPTR, ring->wptr); 6153 (void)RREG32(mmCP_RB0_WPTR); 6154 } 6155 } 6156 6157 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6158 { 6159 u32 ref_and_mask, reg_mem_engine; 6160 6161 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 6162 switch (ring->me) { 6163 case 1: 6164 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6165 break; 6166 case 2: 6167 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6168 break; 6169 default: 6170 return; 6171 } 6172 reg_mem_engine = 0; 6173 } else { 6174 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6175 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6176 } 6177 6178 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6179 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6180 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6181 reg_mem_engine)); 6182 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6183 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6184 amdgpu_ring_write(ring, ref_and_mask); 6185 amdgpu_ring_write(ring, ref_and_mask); 6186 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6187 } 6188 6189 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6190 { 6191 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6192 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6193 WRITE_DATA_DST_SEL(0) | 6194 WR_CONFIRM)); 6195 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6196 amdgpu_ring_write(ring, 0); 6197 amdgpu_ring_write(ring, 1); 6198 6199 } 6200 6201 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6202 struct amdgpu_ib *ib, 6203 unsigned vm_id, bool ctx_switch) 6204 { 6205 u32 header, control = 0; 6206 6207 if (ib->flags & AMDGPU_IB_FLAG_CE) 6208 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6209 else 6210 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6211 6212 control |= ib->length_dw | (vm_id << 24); 6213 6214 amdgpu_ring_write(ring, header); 6215 amdgpu_ring_write(ring, 6216 #ifdef __BIG_ENDIAN 6217 (2 << 0) | 6218 #endif 6219 (ib->gpu_addr & 0xFFFFFFFC)); 6220 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6221 amdgpu_ring_write(ring, control); 6222 } 6223 6224 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6225 struct amdgpu_ib *ib, 6226 unsigned vm_id, bool ctx_switch) 6227 { 6228 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6229 6230 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6231 amdgpu_ring_write(ring, 6232 #ifdef __BIG_ENDIAN 6233 (2 << 0) | 6234 #endif 6235 (ib->gpu_addr & 0xFFFFFFFC)); 6236 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6237 amdgpu_ring_write(ring, control); 6238 } 6239 6240 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6241 u64 seq, unsigned flags) 6242 { 6243 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6244 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6245 6246 /* EVENT_WRITE_EOP - flush caches, send int */ 6247 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6248 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6249 EOP_TC_ACTION_EN | 6250 EOP_TC_WB_ACTION_EN | 6251 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6252 EVENT_INDEX(5))); 6253 amdgpu_ring_write(ring, addr & 0xfffffffc); 6254 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6255 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6256 amdgpu_ring_write(ring, lower_32_bits(seq)); 6257 amdgpu_ring_write(ring, upper_32_bits(seq)); 6258 6259 } 6260 6261 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6262 { 6263 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6264 uint32_t seq = ring->fence_drv.sync_seq; 6265 uint64_t addr = ring->fence_drv.gpu_addr; 6266 6267 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6268 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6269 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6270 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6271 amdgpu_ring_write(ring, addr & 0xfffffffc); 6272 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6273 amdgpu_ring_write(ring, seq); 6274 amdgpu_ring_write(ring, 0xffffffff); 6275 amdgpu_ring_write(ring, 4); /* poll interval */ 6276 } 6277 6278 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6279 unsigned vm_id, uint64_t pd_addr) 6280 { 6281 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6282 6283 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6284 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6285 WRITE_DATA_DST_SEL(0)) | 6286 WR_CONFIRM); 6287 if (vm_id < 8) { 6288 amdgpu_ring_write(ring, 6289 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6290 } else { 6291 amdgpu_ring_write(ring, 6292 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6293 } 6294 amdgpu_ring_write(ring, 0); 6295 amdgpu_ring_write(ring, pd_addr >> 12); 6296 6297 /* bits 0-15 are the VM contexts0-15 */ 6298 /* invalidate the cache */ 6299 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6300 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6301 WRITE_DATA_DST_SEL(0))); 6302 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6303 amdgpu_ring_write(ring, 0); 6304 amdgpu_ring_write(ring, 1 << vm_id); 6305 6306 /* wait for the invalidate to complete */ 6307 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6308 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6309 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6310 WAIT_REG_MEM_ENGINE(0))); /* me */ 6311 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6312 amdgpu_ring_write(ring, 0); 6313 amdgpu_ring_write(ring, 0); /* ref */ 6314 amdgpu_ring_write(ring, 0); /* mask */ 6315 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6316 6317 /* compute doesn't have PFP */ 6318 if (usepfp) { 6319 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6320 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6321 amdgpu_ring_write(ring, 0x0); 6322 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ 6323 amdgpu_ring_insert_nop(ring, 128); 6324 } 6325 } 6326 6327 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6328 { 6329 return ring->adev->wb.wb[ring->wptr_offs]; 6330 } 6331 6332 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6333 { 6334 struct amdgpu_device *adev = ring->adev; 6335 6336 /* XXX check if swapping is necessary on BE */ 6337 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6338 WDOORBELL32(ring->doorbell_index, ring->wptr); 6339 } 6340 6341 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6342 u64 addr, u64 seq, 6343 unsigned flags) 6344 { 6345 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6346 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6347 6348 /* RELEASE_MEM - flush caches, send int */ 6349 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6350 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6351 EOP_TC_ACTION_EN | 6352 EOP_TC_WB_ACTION_EN | 6353 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6354 EVENT_INDEX(5))); 6355 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6356 amdgpu_ring_write(ring, addr & 0xfffffffc); 6357 amdgpu_ring_write(ring, upper_32_bits(addr)); 6358 amdgpu_ring_write(ring, lower_32_bits(seq)); 6359 amdgpu_ring_write(ring, upper_32_bits(seq)); 6360 } 6361 6362 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6363 { 6364 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6365 amdgpu_ring_write(ring, 0); 6366 } 6367 6368 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6369 { 6370 uint32_t dw2 = 0; 6371 6372 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6373 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6374 /* set load_global_config & load_global_uconfig */ 6375 dw2 |= 0x8001; 6376 /* set load_cs_sh_regs */ 6377 dw2 |= 0x01000000; 6378 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6379 dw2 |= 0x10002; 6380 6381 /* set load_ce_ram if preamble presented */ 6382 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6383 dw2 |= 0x10000000; 6384 } else { 6385 /* still load_ce_ram if this is the first time preamble presented 6386 * although there is no context switch happens. 6387 */ 6388 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6389 dw2 |= 0x10000000; 6390 } 6391 6392 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6393 amdgpu_ring_write(ring, dw2); 6394 amdgpu_ring_write(ring, 0); 6395 } 6396 6397 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6398 enum amdgpu_interrupt_state state) 6399 { 6400 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6401 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6402 } 6403 6404 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6405 int me, int pipe, 6406 enum amdgpu_interrupt_state state) 6407 { 6408 /* 6409 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6410 * handles the setting of interrupts for this specific pipe. All other 6411 * pipes' interrupts are set by amdkfd. 6412 */ 6413 6414 if (me == 1) { 6415 switch (pipe) { 6416 case 0: 6417 break; 6418 default: 6419 DRM_DEBUG("invalid pipe %d\n", pipe); 6420 return; 6421 } 6422 } else { 6423 DRM_DEBUG("invalid me %d\n", me); 6424 return; 6425 } 6426 6427 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6428 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6429 } 6430 6431 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6432 struct amdgpu_irq_src *source, 6433 unsigned type, 6434 enum amdgpu_interrupt_state state) 6435 { 6436 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6437 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6438 6439 return 0; 6440 } 6441 6442 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6443 struct amdgpu_irq_src *source, 6444 unsigned type, 6445 enum amdgpu_interrupt_state state) 6446 { 6447 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6448 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6449 6450 return 0; 6451 } 6452 6453 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6454 struct amdgpu_irq_src *src, 6455 unsigned type, 6456 enum amdgpu_interrupt_state state) 6457 { 6458 switch (type) { 6459 case AMDGPU_CP_IRQ_GFX_EOP: 6460 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6461 break; 6462 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6463 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6464 break; 6465 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6466 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6467 break; 6468 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6469 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6470 break; 6471 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6472 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6473 break; 6474 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6475 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6476 break; 6477 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6478 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6479 break; 6480 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6481 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6482 break; 6483 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6484 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6485 break; 6486 default: 6487 break; 6488 } 6489 return 0; 6490 } 6491 6492 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6493 struct amdgpu_irq_src *source, 6494 struct amdgpu_iv_entry *entry) 6495 { 6496 int i; 6497 u8 me_id, pipe_id, queue_id; 6498 struct amdgpu_ring *ring; 6499 6500 DRM_DEBUG("IH: CP EOP\n"); 6501 me_id = (entry->ring_id & 0x0c) >> 2; 6502 pipe_id = (entry->ring_id & 0x03) >> 0; 6503 queue_id = (entry->ring_id & 0x70) >> 4; 6504 6505 switch (me_id) { 6506 case 0: 6507 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6508 break; 6509 case 1: 6510 case 2: 6511 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6512 ring = &adev->gfx.compute_ring[i]; 6513 /* Per-queue interrupt is supported for MEC starting from VI. 6514 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6515 */ 6516 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6517 amdgpu_fence_process(ring); 6518 } 6519 break; 6520 } 6521 return 0; 6522 } 6523 6524 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6525 struct amdgpu_irq_src *source, 6526 struct amdgpu_iv_entry *entry) 6527 { 6528 DRM_ERROR("Illegal register access in command stream\n"); 6529 schedule_work(&adev->reset_work); 6530 return 0; 6531 } 6532 6533 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6534 struct amdgpu_irq_src *source, 6535 struct amdgpu_iv_entry *entry) 6536 { 6537 DRM_ERROR("Illegal instruction in command stream\n"); 6538 schedule_work(&adev->reset_work); 6539 return 0; 6540 } 6541 6542 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6543 .name = "gfx_v8_0", 6544 .early_init = gfx_v8_0_early_init, 6545 .late_init = gfx_v8_0_late_init, 6546 .sw_init = gfx_v8_0_sw_init, 6547 .sw_fini = gfx_v8_0_sw_fini, 6548 .hw_init = gfx_v8_0_hw_init, 6549 .hw_fini = gfx_v8_0_hw_fini, 6550 .suspend = gfx_v8_0_suspend, 6551 .resume = gfx_v8_0_resume, 6552 .is_idle = gfx_v8_0_is_idle, 6553 .wait_for_idle = gfx_v8_0_wait_for_idle, 6554 .check_soft_reset = gfx_v8_0_check_soft_reset, 6555 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6556 .soft_reset = gfx_v8_0_soft_reset, 6557 .post_soft_reset = gfx_v8_0_post_soft_reset, 6558 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6559 .set_powergating_state = gfx_v8_0_set_powergating_state, 6560 }; 6561 6562 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6563 .type = AMDGPU_RING_TYPE_GFX, 6564 .align_mask = 0xff, 6565 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6566 .get_rptr = gfx_v8_0_ring_get_rptr, 6567 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6568 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6569 .emit_frame_size = 6570 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6571 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6572 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6573 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 6574 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6575 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ 6576 2 + /* gfx_v8_ring_emit_sb */ 6577 3, /* gfx_v8_ring_emit_cntxcntl */ 6578 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6579 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6580 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6581 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6582 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6583 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6584 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6585 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6586 .test_ring = gfx_v8_0_ring_test_ring, 6587 .test_ib = gfx_v8_0_ring_test_ib, 6588 .insert_nop = amdgpu_ring_insert_nop, 6589 .pad_ib = amdgpu_ring_generic_pad_ib, 6590 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6591 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6592 }; 6593 6594 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6595 .type = AMDGPU_RING_TYPE_COMPUTE, 6596 .align_mask = 0xff, 6597 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6598 .get_rptr = gfx_v8_0_ring_get_rptr, 6599 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6600 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6601 .emit_frame_size = 6602 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6603 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6604 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6605 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6606 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6607 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6608 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6609 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6610 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6611 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6612 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6613 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6614 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6615 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6616 .test_ring = gfx_v8_0_ring_test_ring, 6617 .test_ib = gfx_v8_0_ring_test_ib, 6618 .insert_nop = amdgpu_ring_insert_nop, 6619 .pad_ib = amdgpu_ring_generic_pad_ib, 6620 }; 6621 6622 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6623 { 6624 int i; 6625 6626 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6627 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6628 6629 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6630 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6631 } 6632 6633 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6634 .set = gfx_v8_0_set_eop_interrupt_state, 6635 .process = gfx_v8_0_eop_irq, 6636 }; 6637 6638 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6639 .set = gfx_v8_0_set_priv_reg_fault_state, 6640 .process = gfx_v8_0_priv_reg_irq, 6641 }; 6642 6643 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6644 .set = gfx_v8_0_set_priv_inst_fault_state, 6645 .process = gfx_v8_0_priv_inst_irq, 6646 }; 6647 6648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6649 { 6650 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6651 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6652 6653 adev->gfx.priv_reg_irq.num_types = 1; 6654 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6655 6656 adev->gfx.priv_inst_irq.num_types = 1; 6657 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6658 } 6659 6660 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6661 { 6662 switch (adev->asic_type) { 6663 case CHIP_TOPAZ: 6664 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6665 break; 6666 case CHIP_STONEY: 6667 case CHIP_CARRIZO: 6668 adev->gfx.rlc.funcs = &cz_rlc_funcs; 6669 break; 6670 default: 6671 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; 6672 break; 6673 } 6674 } 6675 6676 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6677 { 6678 /* init asci gds info */ 6679 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6680 adev->gds.gws.total_size = 64; 6681 adev->gds.oa.total_size = 16; 6682 6683 if (adev->gds.mem.total_size == 64 * 1024) { 6684 adev->gds.mem.gfx_partition_size = 4096; 6685 adev->gds.mem.cs_partition_size = 4096; 6686 6687 adev->gds.gws.gfx_partition_size = 4; 6688 adev->gds.gws.cs_partition_size = 4; 6689 6690 adev->gds.oa.gfx_partition_size = 4; 6691 adev->gds.oa.cs_partition_size = 1; 6692 } else { 6693 adev->gds.mem.gfx_partition_size = 1024; 6694 adev->gds.mem.cs_partition_size = 1024; 6695 6696 adev->gds.gws.gfx_partition_size = 16; 6697 adev->gds.gws.cs_partition_size = 16; 6698 6699 adev->gds.oa.gfx_partition_size = 4; 6700 adev->gds.oa.cs_partition_size = 4; 6701 } 6702 } 6703 6704 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6705 u32 bitmap) 6706 { 6707 u32 data; 6708 6709 if (!bitmap) 6710 return; 6711 6712 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6713 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6714 6715 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 6716 } 6717 6718 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6719 { 6720 u32 data, mask; 6721 6722 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6723 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6724 6725 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6726 6727 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6728 } 6729 6730 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6731 { 6732 int i, j, k, counter, active_cu_number = 0; 6733 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6734 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6735 unsigned disable_masks[4 * 2]; 6736 6737 memset(cu_info, 0, sizeof(*cu_info)); 6738 6739 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 6740 6741 mutex_lock(&adev->grbm_idx_mutex); 6742 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6743 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6744 mask = 1; 6745 ao_bitmap = 0; 6746 counter = 0; 6747 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 6748 if (i < 4 && j < 2) 6749 gfx_v8_0_set_user_cu_inactive_bitmap( 6750 adev, disable_masks[i * 2 + j]); 6751 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6752 cu_info->bitmap[i][j] = bitmap; 6753 6754 for (k = 0; k < 16; k ++) { 6755 if (bitmap & mask) { 6756 if (counter < 2) 6757 ao_bitmap |= mask; 6758 counter ++; 6759 } 6760 mask <<= 1; 6761 } 6762 active_cu_number += counter; 6763 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6764 } 6765 } 6766 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6767 mutex_unlock(&adev->grbm_idx_mutex); 6768 6769 cu_info->number = active_cu_number; 6770 cu_info->ao_cu_mask = ao_cu_mask; 6771 } 6772 6773 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 6774 { 6775 .type = AMD_IP_BLOCK_TYPE_GFX, 6776 .major = 8, 6777 .minor = 0, 6778 .rev = 0, 6779 .funcs = &gfx_v8_0_ip_funcs, 6780 }; 6781 6782 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 6783 { 6784 .type = AMD_IP_BLOCK_TYPE_GFX, 6785 .major = 8, 6786 .minor = 1, 6787 .rev = 0, 6788 .funcs = &gfx_v8_0_ip_funcs, 6789 }; 6790