1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vi_structs.h" 29 #include "vid.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_atombios.h" 32 #include "atombios_i2c.h" 33 #include "clearstate_vi.h" 34 35 #include "gmc/gmc_8_2_d.h" 36 #include "gmc/gmc_8_2_sh_mask.h" 37 38 #include "oss/oss_3_0_d.h" 39 #include "oss/oss_3_0_sh_mask.h" 40 41 #include "bif/bif_5_0_d.h" 42 #include "bif/bif_5_0_sh_mask.h" 43 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_NUM_COMPUTE_RINGS 8 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 134 135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 143 { 144 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 145 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 146 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 147 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 148 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 149 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 150 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 151 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 152 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 153 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 154 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 155 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 156 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 157 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 158 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 159 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 160 }; 161 162 static const u32 golden_settings_tonga_a11[] = 163 { 164 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 165 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 166 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 167 mmGB_GPU_ID, 0x0000000f, 0x00000000, 168 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 169 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 170 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 171 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 172 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 173 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 174 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 175 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 176 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 177 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 178 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 179 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 180 }; 181 182 static const u32 tonga_golden_common_all[] = 183 { 184 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 185 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 186 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 187 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 188 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 189 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 190 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 191 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 192 }; 193 194 static const u32 tonga_mgcg_cgcg_init[] = 195 { 196 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 197 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 198 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 200 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 201 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 202 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 203 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 204 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 205 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 207 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 212 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 213 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 214 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 215 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 216 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 217 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 218 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 219 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 220 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 221 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 222 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 223 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 224 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 225 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 227 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 228 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 229 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 230 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 231 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 232 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 233 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 234 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 235 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 236 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 237 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 238 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 239 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 240 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 241 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 242 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 243 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 244 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 245 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 246 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 247 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 248 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 249 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 250 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 251 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 252 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 253 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 254 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 255 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 256 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 257 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 258 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 259 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 260 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 261 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 262 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 263 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 264 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 265 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 266 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 267 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 268 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 269 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 270 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 271 }; 272 273 static const u32 golden_settings_polaris11_a11[] = 274 { 275 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 276 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 277 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 278 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 279 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 280 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 281 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 282 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 283 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 284 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 285 mmSQ_CONFIG, 0x07f80000, 0x01180000, 286 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 287 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 288 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 289 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 290 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 291 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 292 }; 293 294 static const u32 polaris11_golden_common_all[] = 295 { 296 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 297 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 298 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 299 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 300 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 301 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 302 }; 303 304 static const u32 golden_settings_polaris10_a11[] = 305 { 306 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 307 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 308 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 309 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 310 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 311 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 312 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 313 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 314 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 315 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 316 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 317 mmSQ_CONFIG, 0x07f80000, 0x07180000, 318 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 319 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 320 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 321 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 polaris10_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 329 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 330 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 331 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 332 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 333 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 334 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 335 }; 336 337 static const u32 fiji_golden_common_all[] = 338 { 339 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 340 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 341 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 342 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 343 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 344 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 345 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 346 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 347 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 348 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 349 }; 350 351 static const u32 golden_settings_fiji_a10[] = 352 { 353 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 354 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 355 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 356 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 357 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 358 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 359 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 360 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 361 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 362 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 363 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 364 }; 365 366 static const u32 fiji_mgcg_cgcg_init[] = 367 { 368 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 370 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 371 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 372 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 373 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 374 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 375 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 376 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 377 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 379 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 384 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 385 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 386 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 387 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 388 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 389 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 390 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 391 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 392 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 393 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 394 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 395 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 396 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 397 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 398 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 399 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 400 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 401 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 402 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 403 }; 404 405 static const u32 golden_settings_iceland_a11[] = 406 { 407 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 408 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 409 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 410 mmGB_GPU_ID, 0x0000000f, 0x00000000, 411 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 412 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 413 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 414 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 415 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 416 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 417 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 418 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 419 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 420 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 421 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 422 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 423 }; 424 425 static const u32 iceland_golden_common_all[] = 426 { 427 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 428 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 429 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 430 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 431 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 432 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 433 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 434 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 435 }; 436 437 static const u32 iceland_mgcg_cgcg_init[] = 438 { 439 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 440 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 441 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 444 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 445 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 446 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 448 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 450 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 455 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 456 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 457 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 458 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 459 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 460 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 461 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 462 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 463 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 464 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 465 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 466 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 467 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 468 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 469 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 470 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 471 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 472 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 473 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 474 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 475 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 476 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 477 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 478 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 479 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 480 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 481 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 482 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 483 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 484 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 485 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 486 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 487 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 488 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 489 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 490 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 491 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 492 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 493 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 494 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 495 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 496 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 497 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 498 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 499 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 500 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 501 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 502 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 503 }; 504 505 static const u32 cz_golden_settings_a11[] = 506 { 507 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 508 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 509 mmGB_GPU_ID, 0x0000000f, 0x00000000, 510 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 511 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 512 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 513 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 514 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 515 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 516 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 517 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 518 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 519 }; 520 521 static const u32 cz_golden_common_all[] = 522 { 523 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 524 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 525 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 526 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 527 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 528 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 529 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 530 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 531 }; 532 533 static const u32 cz_mgcg_cgcg_init[] = 534 { 535 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 536 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 537 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 538 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 539 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 540 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 541 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 542 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 543 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 544 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 546 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 551 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 552 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 553 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 554 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 555 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 556 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 557 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 558 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 559 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 560 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 561 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 562 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 563 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 564 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 565 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 566 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 567 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 568 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 569 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 570 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 571 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 572 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 573 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 574 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 575 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 576 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 577 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 578 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 579 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 580 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 581 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 582 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 583 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 584 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 585 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 586 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 587 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 588 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 589 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 590 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 591 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 592 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 593 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 594 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 595 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 596 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 597 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 598 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 599 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 600 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 601 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 602 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 603 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 604 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 605 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 606 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 607 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 608 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 609 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 610 }; 611 612 static const u32 stoney_golden_settings_a11[] = 613 { 614 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 615 mmGB_GPU_ID, 0x0000000f, 0x00000000, 616 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 617 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 618 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 619 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 620 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 621 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 622 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 623 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 624 }; 625 626 static const u32 stoney_golden_common_all[] = 627 { 628 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 629 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 630 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 631 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 632 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 633 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 634 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 635 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 636 }; 637 638 static const u32 stoney_mgcg_cgcg_init[] = 639 { 640 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 641 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 642 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 643 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 644 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 645 }; 646 647 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 649 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 650 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 651 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 652 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 653 654 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 655 { 656 switch (adev->asic_type) { 657 case CHIP_TOPAZ: 658 amdgpu_program_register_sequence(adev, 659 iceland_mgcg_cgcg_init, 660 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 661 amdgpu_program_register_sequence(adev, 662 golden_settings_iceland_a11, 663 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 664 amdgpu_program_register_sequence(adev, 665 iceland_golden_common_all, 666 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 667 break; 668 case CHIP_FIJI: 669 amdgpu_program_register_sequence(adev, 670 fiji_mgcg_cgcg_init, 671 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 672 amdgpu_program_register_sequence(adev, 673 golden_settings_fiji_a10, 674 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 675 amdgpu_program_register_sequence(adev, 676 fiji_golden_common_all, 677 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 678 break; 679 680 case CHIP_TONGA: 681 amdgpu_program_register_sequence(adev, 682 tonga_mgcg_cgcg_init, 683 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 684 amdgpu_program_register_sequence(adev, 685 golden_settings_tonga_a11, 686 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 687 amdgpu_program_register_sequence(adev, 688 tonga_golden_common_all, 689 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 690 break; 691 case CHIP_POLARIS11: 692 amdgpu_program_register_sequence(adev, 693 golden_settings_polaris11_a11, 694 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 695 amdgpu_program_register_sequence(adev, 696 polaris11_golden_common_all, 697 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 698 break; 699 case CHIP_POLARIS10: 700 amdgpu_program_register_sequence(adev, 701 golden_settings_polaris10_a11, 702 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 703 amdgpu_program_register_sequence(adev, 704 polaris10_golden_common_all, 705 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 706 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 707 if (adev->pdev->revision == 0xc7 && 708 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 709 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 710 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 711 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 712 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 713 } 714 break; 715 case CHIP_CARRIZO: 716 amdgpu_program_register_sequence(adev, 717 cz_mgcg_cgcg_init, 718 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 719 amdgpu_program_register_sequence(adev, 720 cz_golden_settings_a11, 721 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 722 amdgpu_program_register_sequence(adev, 723 cz_golden_common_all, 724 (const u32)ARRAY_SIZE(cz_golden_common_all)); 725 break; 726 case CHIP_STONEY: 727 amdgpu_program_register_sequence(adev, 728 stoney_mgcg_cgcg_init, 729 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 730 amdgpu_program_register_sequence(adev, 731 stoney_golden_settings_a11, 732 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 733 amdgpu_program_register_sequence(adev, 734 stoney_golden_common_all, 735 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 736 break; 737 default: 738 break; 739 } 740 } 741 742 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 743 { 744 int i; 745 746 adev->gfx.scratch.num_reg = 7; 747 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 748 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 749 adev->gfx.scratch.free[i] = true; 750 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 751 } 752 } 753 754 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 755 { 756 struct amdgpu_device *adev = ring->adev; 757 uint32_t scratch; 758 uint32_t tmp = 0; 759 unsigned i; 760 int r; 761 762 r = amdgpu_gfx_scratch_get(adev, &scratch); 763 if (r) { 764 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 765 return r; 766 } 767 WREG32(scratch, 0xCAFEDEAD); 768 r = amdgpu_ring_alloc(ring, 3); 769 if (r) { 770 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 771 ring->idx, r); 772 amdgpu_gfx_scratch_free(adev, scratch); 773 return r; 774 } 775 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 776 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 777 amdgpu_ring_write(ring, 0xDEADBEEF); 778 amdgpu_ring_commit(ring); 779 780 for (i = 0; i < adev->usec_timeout; i++) { 781 tmp = RREG32(scratch); 782 if (tmp == 0xDEADBEEF) 783 break; 784 DRM_UDELAY(1); 785 } 786 if (i < adev->usec_timeout) { 787 DRM_INFO("ring test on %d succeeded in %d usecs\n", 788 ring->idx, i); 789 } else { 790 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 791 ring->idx, scratch, tmp); 792 r = -EINVAL; 793 } 794 amdgpu_gfx_scratch_free(adev, scratch); 795 return r; 796 } 797 798 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 799 { 800 struct amdgpu_device *adev = ring->adev; 801 struct amdgpu_ib ib; 802 struct dma_fence *f = NULL; 803 uint32_t scratch; 804 uint32_t tmp = 0; 805 long r; 806 807 r = amdgpu_gfx_scratch_get(adev, &scratch); 808 if (r) { 809 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 810 return r; 811 } 812 WREG32(scratch, 0xCAFEDEAD); 813 memset(&ib, 0, sizeof(ib)); 814 r = amdgpu_ib_get(adev, NULL, 256, &ib); 815 if (r) { 816 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 817 goto err1; 818 } 819 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 820 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 821 ib.ptr[2] = 0xDEADBEEF; 822 ib.length_dw = 3; 823 824 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 825 if (r) 826 goto err2; 827 828 r = dma_fence_wait_timeout(f, false, timeout); 829 if (r == 0) { 830 DRM_ERROR("amdgpu: IB test timed out.\n"); 831 r = -ETIMEDOUT; 832 goto err2; 833 } else if (r < 0) { 834 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 835 goto err2; 836 } 837 tmp = RREG32(scratch); 838 if (tmp == 0xDEADBEEF) { 839 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 840 r = 0; 841 } else { 842 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 843 scratch, tmp); 844 r = -EINVAL; 845 } 846 err2: 847 amdgpu_ib_free(adev, &ib, NULL); 848 dma_fence_put(f); 849 err1: 850 amdgpu_gfx_scratch_free(adev, scratch); 851 return r; 852 } 853 854 855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 856 release_firmware(adev->gfx.pfp_fw); 857 adev->gfx.pfp_fw = NULL; 858 release_firmware(adev->gfx.me_fw); 859 adev->gfx.me_fw = NULL; 860 release_firmware(adev->gfx.ce_fw); 861 adev->gfx.ce_fw = NULL; 862 release_firmware(adev->gfx.rlc_fw); 863 adev->gfx.rlc_fw = NULL; 864 release_firmware(adev->gfx.mec_fw); 865 adev->gfx.mec_fw = NULL; 866 if ((adev->asic_type != CHIP_STONEY) && 867 (adev->asic_type != CHIP_TOPAZ)) 868 release_firmware(adev->gfx.mec2_fw); 869 adev->gfx.mec2_fw = NULL; 870 871 kfree(adev->gfx.rlc.register_list_format); 872 } 873 874 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 875 { 876 const char *chip_name; 877 char fw_name[30]; 878 int err; 879 struct amdgpu_firmware_info *info = NULL; 880 const struct common_firmware_header *header = NULL; 881 const struct gfx_firmware_header_v1_0 *cp_hdr; 882 const struct rlc_firmware_header_v2_0 *rlc_hdr; 883 unsigned int *tmp = NULL, i; 884 885 DRM_DEBUG("\n"); 886 887 switch (adev->asic_type) { 888 case CHIP_TOPAZ: 889 chip_name = "topaz"; 890 break; 891 case CHIP_TONGA: 892 chip_name = "tonga"; 893 break; 894 case CHIP_CARRIZO: 895 chip_name = "carrizo"; 896 break; 897 case CHIP_FIJI: 898 chip_name = "fiji"; 899 break; 900 case CHIP_POLARIS11: 901 chip_name = "polaris11"; 902 break; 903 case CHIP_POLARIS10: 904 chip_name = "polaris10"; 905 break; 906 case CHIP_STONEY: 907 chip_name = "stoney"; 908 break; 909 default: 910 BUG(); 911 } 912 913 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 914 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 915 if (err) 916 goto out; 917 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 918 if (err) 919 goto out; 920 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 921 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 922 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 923 924 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 925 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 926 if (err) 927 goto out; 928 err = amdgpu_ucode_validate(adev->gfx.me_fw); 929 if (err) 930 goto out; 931 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 932 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 933 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 934 935 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 936 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 937 if (err) 938 goto out; 939 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 940 if (err) 941 goto out; 942 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 943 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 944 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 945 946 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 947 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 948 if (err) 949 goto out; 950 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 951 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 952 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 953 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 954 955 adev->gfx.rlc.save_and_restore_offset = 956 le32_to_cpu(rlc_hdr->save_and_restore_offset); 957 adev->gfx.rlc.clear_state_descriptor_offset = 958 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 959 adev->gfx.rlc.avail_scratch_ram_locations = 960 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 961 adev->gfx.rlc.reg_restore_list_size = 962 le32_to_cpu(rlc_hdr->reg_restore_list_size); 963 adev->gfx.rlc.reg_list_format_start = 964 le32_to_cpu(rlc_hdr->reg_list_format_start); 965 adev->gfx.rlc.reg_list_format_separate_start = 966 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 967 adev->gfx.rlc.starting_offsets_start = 968 le32_to_cpu(rlc_hdr->starting_offsets_start); 969 adev->gfx.rlc.reg_list_format_size_bytes = 970 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 971 adev->gfx.rlc.reg_list_size_bytes = 972 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 973 974 adev->gfx.rlc.register_list_format = 975 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 976 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 977 978 if (!adev->gfx.rlc.register_list_format) { 979 err = -ENOMEM; 980 goto out; 981 } 982 983 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 984 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 985 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 986 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 987 988 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 989 990 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 991 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 992 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 993 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 994 995 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 996 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 997 if (err) 998 goto out; 999 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1000 if (err) 1001 goto out; 1002 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1003 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1004 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1005 1006 if ((adev->asic_type != CHIP_STONEY) && 1007 (adev->asic_type != CHIP_TOPAZ)) { 1008 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1009 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1010 if (!err) { 1011 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1012 if (err) 1013 goto out; 1014 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1015 adev->gfx.mec2_fw->data; 1016 adev->gfx.mec2_fw_version = 1017 le32_to_cpu(cp_hdr->header.ucode_version); 1018 adev->gfx.mec2_feature_version = 1019 le32_to_cpu(cp_hdr->ucode_feature_version); 1020 } else { 1021 err = 0; 1022 adev->gfx.mec2_fw = NULL; 1023 } 1024 } 1025 1026 if (adev->firmware.smu_load) { 1027 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1028 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1029 info->fw = adev->gfx.pfp_fw; 1030 header = (const struct common_firmware_header *)info->fw->data; 1031 adev->firmware.fw_size += 1032 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1033 1034 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1035 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1036 info->fw = adev->gfx.me_fw; 1037 header = (const struct common_firmware_header *)info->fw->data; 1038 adev->firmware.fw_size += 1039 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1040 1041 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1042 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1043 info->fw = adev->gfx.ce_fw; 1044 header = (const struct common_firmware_header *)info->fw->data; 1045 adev->firmware.fw_size += 1046 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1047 1048 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1049 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1050 info->fw = adev->gfx.rlc_fw; 1051 header = (const struct common_firmware_header *)info->fw->data; 1052 adev->firmware.fw_size += 1053 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1054 1055 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1056 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1057 info->fw = adev->gfx.mec_fw; 1058 header = (const struct common_firmware_header *)info->fw->data; 1059 adev->firmware.fw_size += 1060 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1061 1062 /* we need account JT in */ 1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1064 adev->firmware.fw_size += 1065 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1066 1067 if (amdgpu_sriov_vf(adev)) { 1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1069 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1070 info->fw = adev->gfx.mec_fw; 1071 adev->firmware.fw_size += 1072 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1073 } 1074 1075 if (adev->gfx.mec2_fw) { 1076 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1077 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1078 info->fw = adev->gfx.mec2_fw; 1079 header = (const struct common_firmware_header *)info->fw->data; 1080 adev->firmware.fw_size += 1081 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1082 } 1083 1084 } 1085 1086 out: 1087 if (err) { 1088 dev_err(adev->dev, 1089 "gfx8: Failed to load firmware \"%s\"\n", 1090 fw_name); 1091 release_firmware(adev->gfx.pfp_fw); 1092 adev->gfx.pfp_fw = NULL; 1093 release_firmware(adev->gfx.me_fw); 1094 adev->gfx.me_fw = NULL; 1095 release_firmware(adev->gfx.ce_fw); 1096 adev->gfx.ce_fw = NULL; 1097 release_firmware(adev->gfx.rlc_fw); 1098 adev->gfx.rlc_fw = NULL; 1099 release_firmware(adev->gfx.mec_fw); 1100 adev->gfx.mec_fw = NULL; 1101 release_firmware(adev->gfx.mec2_fw); 1102 adev->gfx.mec2_fw = NULL; 1103 } 1104 return err; 1105 } 1106 1107 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1108 volatile u32 *buffer) 1109 { 1110 u32 count = 0, i; 1111 const struct cs_section_def *sect = NULL; 1112 const struct cs_extent_def *ext = NULL; 1113 1114 if (adev->gfx.rlc.cs_data == NULL) 1115 return; 1116 if (buffer == NULL) 1117 return; 1118 1119 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1120 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1121 1122 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1123 buffer[count++] = cpu_to_le32(0x80000000); 1124 buffer[count++] = cpu_to_le32(0x80000000); 1125 1126 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1127 for (ext = sect->section; ext->extent != NULL; ++ext) { 1128 if (sect->id == SECT_CONTEXT) { 1129 buffer[count++] = 1130 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1131 buffer[count++] = cpu_to_le32(ext->reg_index - 1132 PACKET3_SET_CONTEXT_REG_START); 1133 for (i = 0; i < ext->reg_count; i++) 1134 buffer[count++] = cpu_to_le32(ext->extent[i]); 1135 } else { 1136 return; 1137 } 1138 } 1139 } 1140 1141 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1142 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1143 PACKET3_SET_CONTEXT_REG_START); 1144 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1145 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1146 1147 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1148 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1149 1150 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1151 buffer[count++] = cpu_to_le32(0); 1152 } 1153 1154 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1155 { 1156 const __le32 *fw_data; 1157 volatile u32 *dst_ptr; 1158 int me, i, max_me = 4; 1159 u32 bo_offset = 0; 1160 u32 table_offset, table_size; 1161 1162 if (adev->asic_type == CHIP_CARRIZO) 1163 max_me = 5; 1164 1165 /* write the cp table buffer */ 1166 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1167 for (me = 0; me < max_me; me++) { 1168 if (me == 0) { 1169 const struct gfx_firmware_header_v1_0 *hdr = 1170 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1171 fw_data = (const __le32 *) 1172 (adev->gfx.ce_fw->data + 1173 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1174 table_offset = le32_to_cpu(hdr->jt_offset); 1175 table_size = le32_to_cpu(hdr->jt_size); 1176 } else if (me == 1) { 1177 const struct gfx_firmware_header_v1_0 *hdr = 1178 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1179 fw_data = (const __le32 *) 1180 (adev->gfx.pfp_fw->data + 1181 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1182 table_offset = le32_to_cpu(hdr->jt_offset); 1183 table_size = le32_to_cpu(hdr->jt_size); 1184 } else if (me == 2) { 1185 const struct gfx_firmware_header_v1_0 *hdr = 1186 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1187 fw_data = (const __le32 *) 1188 (adev->gfx.me_fw->data + 1189 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1190 table_offset = le32_to_cpu(hdr->jt_offset); 1191 table_size = le32_to_cpu(hdr->jt_size); 1192 } else if (me == 3) { 1193 const struct gfx_firmware_header_v1_0 *hdr = 1194 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1195 fw_data = (const __le32 *) 1196 (adev->gfx.mec_fw->data + 1197 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1198 table_offset = le32_to_cpu(hdr->jt_offset); 1199 table_size = le32_to_cpu(hdr->jt_size); 1200 } else if (me == 4) { 1201 const struct gfx_firmware_header_v1_0 *hdr = 1202 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1203 fw_data = (const __le32 *) 1204 (adev->gfx.mec2_fw->data + 1205 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1206 table_offset = le32_to_cpu(hdr->jt_offset); 1207 table_size = le32_to_cpu(hdr->jt_size); 1208 } 1209 1210 for (i = 0; i < table_size; i ++) { 1211 dst_ptr[bo_offset + i] = 1212 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1213 } 1214 1215 bo_offset += table_size; 1216 } 1217 } 1218 1219 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1220 { 1221 int r; 1222 1223 /* clear state block */ 1224 if (adev->gfx.rlc.clear_state_obj) { 1225 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1226 if (unlikely(r != 0)) 1227 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1228 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1229 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1230 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1231 adev->gfx.rlc.clear_state_obj = NULL; 1232 } 1233 1234 /* jump table block */ 1235 if (adev->gfx.rlc.cp_table_obj) { 1236 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1237 if (unlikely(r != 0)) 1238 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1239 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1240 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1241 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1242 adev->gfx.rlc.cp_table_obj = NULL; 1243 } 1244 } 1245 1246 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1247 { 1248 volatile u32 *dst_ptr; 1249 u32 dws; 1250 const struct cs_section_def *cs_data; 1251 int r; 1252 1253 adev->gfx.rlc.cs_data = vi_cs_data; 1254 1255 cs_data = adev->gfx.rlc.cs_data; 1256 1257 if (cs_data) { 1258 /* clear state block */ 1259 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1260 1261 if (adev->gfx.rlc.clear_state_obj == NULL) { 1262 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1263 AMDGPU_GEM_DOMAIN_VRAM, 1264 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1265 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1266 NULL, NULL, 1267 &adev->gfx.rlc.clear_state_obj); 1268 if (r) { 1269 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1270 gfx_v8_0_rlc_fini(adev); 1271 return r; 1272 } 1273 } 1274 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1275 if (unlikely(r != 0)) { 1276 gfx_v8_0_rlc_fini(adev); 1277 return r; 1278 } 1279 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1280 &adev->gfx.rlc.clear_state_gpu_addr); 1281 if (r) { 1282 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1283 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1284 gfx_v8_0_rlc_fini(adev); 1285 return r; 1286 } 1287 1288 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1289 if (r) { 1290 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1291 gfx_v8_0_rlc_fini(adev); 1292 return r; 1293 } 1294 /* set up the cs buffer */ 1295 dst_ptr = adev->gfx.rlc.cs_ptr; 1296 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1297 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1298 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1299 } 1300 1301 if ((adev->asic_type == CHIP_CARRIZO) || 1302 (adev->asic_type == CHIP_STONEY)) { 1303 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1304 if (adev->gfx.rlc.cp_table_obj == NULL) { 1305 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1306 AMDGPU_GEM_DOMAIN_VRAM, 1307 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1308 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1309 NULL, NULL, 1310 &adev->gfx.rlc.cp_table_obj); 1311 if (r) { 1312 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1313 return r; 1314 } 1315 } 1316 1317 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1318 if (unlikely(r != 0)) { 1319 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1320 return r; 1321 } 1322 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1323 &adev->gfx.rlc.cp_table_gpu_addr); 1324 if (r) { 1325 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1326 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1327 return r; 1328 } 1329 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1330 if (r) { 1331 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1332 return r; 1333 } 1334 1335 cz_init_cp_jump_table(adev); 1336 1337 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1338 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1339 } 1340 1341 return 0; 1342 } 1343 1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1345 { 1346 int r; 1347 1348 if (adev->gfx.mec.hpd_eop_obj) { 1349 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1350 if (unlikely(r != 0)) 1351 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1352 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1353 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1354 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1355 adev->gfx.mec.hpd_eop_obj = NULL; 1356 } 1357 } 1358 1359 #define MEC_HPD_SIZE 2048 1360 1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1362 { 1363 int r; 1364 u32 *hpd; 1365 1366 /* 1367 * we assign only 1 pipe because all other pipes will 1368 * be handled by KFD 1369 */ 1370 adev->gfx.mec.num_mec = 1; 1371 adev->gfx.mec.num_pipe = 1; 1372 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1373 1374 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1375 r = amdgpu_bo_create(adev, 1376 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 1377 PAGE_SIZE, true, 1378 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1379 &adev->gfx.mec.hpd_eop_obj); 1380 if (r) { 1381 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1382 return r; 1383 } 1384 } 1385 1386 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1387 if (unlikely(r != 0)) { 1388 gfx_v8_0_mec_fini(adev); 1389 return r; 1390 } 1391 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1392 &adev->gfx.mec.hpd_eop_gpu_addr); 1393 if (r) { 1394 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1395 gfx_v8_0_mec_fini(adev); 1396 return r; 1397 } 1398 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1399 if (r) { 1400 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1401 gfx_v8_0_mec_fini(adev); 1402 return r; 1403 } 1404 1405 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); 1406 1407 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1408 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1409 1410 return 0; 1411 } 1412 1413 static const u32 vgpr_init_compute_shader[] = 1414 { 1415 0x7e000209, 0x7e020208, 1416 0x7e040207, 0x7e060206, 1417 0x7e080205, 0x7e0a0204, 1418 0x7e0c0203, 0x7e0e0202, 1419 0x7e100201, 0x7e120200, 1420 0x7e140209, 0x7e160208, 1421 0x7e180207, 0x7e1a0206, 1422 0x7e1c0205, 0x7e1e0204, 1423 0x7e200203, 0x7e220202, 1424 0x7e240201, 0x7e260200, 1425 0x7e280209, 0x7e2a0208, 1426 0x7e2c0207, 0x7e2e0206, 1427 0x7e300205, 0x7e320204, 1428 0x7e340203, 0x7e360202, 1429 0x7e380201, 0x7e3a0200, 1430 0x7e3c0209, 0x7e3e0208, 1431 0x7e400207, 0x7e420206, 1432 0x7e440205, 0x7e460204, 1433 0x7e480203, 0x7e4a0202, 1434 0x7e4c0201, 0x7e4e0200, 1435 0x7e500209, 0x7e520208, 1436 0x7e540207, 0x7e560206, 1437 0x7e580205, 0x7e5a0204, 1438 0x7e5c0203, 0x7e5e0202, 1439 0x7e600201, 0x7e620200, 1440 0x7e640209, 0x7e660208, 1441 0x7e680207, 0x7e6a0206, 1442 0x7e6c0205, 0x7e6e0204, 1443 0x7e700203, 0x7e720202, 1444 0x7e740201, 0x7e760200, 1445 0x7e780209, 0x7e7a0208, 1446 0x7e7c0207, 0x7e7e0206, 1447 0xbf8a0000, 0xbf810000, 1448 }; 1449 1450 static const u32 sgpr_init_compute_shader[] = 1451 { 1452 0xbe8a0100, 0xbe8c0102, 1453 0xbe8e0104, 0xbe900106, 1454 0xbe920108, 0xbe940100, 1455 0xbe960102, 0xbe980104, 1456 0xbe9a0106, 0xbe9c0108, 1457 0xbe9e0100, 0xbea00102, 1458 0xbea20104, 0xbea40106, 1459 0xbea60108, 0xbea80100, 1460 0xbeaa0102, 0xbeac0104, 1461 0xbeae0106, 0xbeb00108, 1462 0xbeb20100, 0xbeb40102, 1463 0xbeb60104, 0xbeb80106, 1464 0xbeba0108, 0xbebc0100, 1465 0xbebe0102, 0xbec00104, 1466 0xbec20106, 0xbec40108, 1467 0xbec60100, 0xbec80102, 1468 0xbee60004, 0xbee70005, 1469 0xbeea0006, 0xbeeb0007, 1470 0xbee80008, 0xbee90009, 1471 0xbefc0000, 0xbf8a0000, 1472 0xbf810000, 0x00000000, 1473 }; 1474 1475 static const u32 vgpr_init_regs[] = 1476 { 1477 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1478 mmCOMPUTE_RESOURCE_LIMITS, 0, 1479 mmCOMPUTE_NUM_THREAD_X, 256*4, 1480 mmCOMPUTE_NUM_THREAD_Y, 1, 1481 mmCOMPUTE_NUM_THREAD_Z, 1, 1482 mmCOMPUTE_PGM_RSRC2, 20, 1483 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1484 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1485 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1486 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1487 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1488 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1489 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1490 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1491 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1492 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1493 }; 1494 1495 static const u32 sgpr1_init_regs[] = 1496 { 1497 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1498 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1499 mmCOMPUTE_NUM_THREAD_X, 256*5, 1500 mmCOMPUTE_NUM_THREAD_Y, 1, 1501 mmCOMPUTE_NUM_THREAD_Z, 1, 1502 mmCOMPUTE_PGM_RSRC2, 20, 1503 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1504 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1505 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1506 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1507 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1508 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1509 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1510 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1511 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1512 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1513 }; 1514 1515 static const u32 sgpr2_init_regs[] = 1516 { 1517 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1518 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1519 mmCOMPUTE_NUM_THREAD_X, 256*5, 1520 mmCOMPUTE_NUM_THREAD_Y, 1, 1521 mmCOMPUTE_NUM_THREAD_Z, 1, 1522 mmCOMPUTE_PGM_RSRC2, 20, 1523 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1524 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1525 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1526 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1527 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1528 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1529 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1530 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1531 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1532 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1533 }; 1534 1535 static const u32 sec_ded_counter_registers[] = 1536 { 1537 mmCPC_EDC_ATC_CNT, 1538 mmCPC_EDC_SCRATCH_CNT, 1539 mmCPC_EDC_UCODE_CNT, 1540 mmCPF_EDC_ATC_CNT, 1541 mmCPF_EDC_ROQ_CNT, 1542 mmCPF_EDC_TAG_CNT, 1543 mmCPG_EDC_ATC_CNT, 1544 mmCPG_EDC_DMA_CNT, 1545 mmCPG_EDC_TAG_CNT, 1546 mmDC_EDC_CSINVOC_CNT, 1547 mmDC_EDC_RESTORE_CNT, 1548 mmDC_EDC_STATE_CNT, 1549 mmGDS_EDC_CNT, 1550 mmGDS_EDC_GRBM_CNT, 1551 mmGDS_EDC_OA_DED, 1552 mmSPI_EDC_CNT, 1553 mmSQC_ATC_EDC_GATCL1_CNT, 1554 mmSQC_EDC_CNT, 1555 mmSQ_EDC_DED_CNT, 1556 mmSQ_EDC_INFO, 1557 mmSQ_EDC_SEC_CNT, 1558 mmTCC_EDC_CNT, 1559 mmTCP_ATC_EDC_GATCL1_CNT, 1560 mmTCP_EDC_CNT, 1561 mmTD_EDC_CNT 1562 }; 1563 1564 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1565 { 1566 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1567 struct amdgpu_ib ib; 1568 struct dma_fence *f = NULL; 1569 int r, i; 1570 u32 tmp; 1571 unsigned total_size, vgpr_offset, sgpr_offset; 1572 u64 gpu_addr; 1573 1574 /* only supported on CZ */ 1575 if (adev->asic_type != CHIP_CARRIZO) 1576 return 0; 1577 1578 /* bail if the compute ring is not ready */ 1579 if (!ring->ready) 1580 return 0; 1581 1582 tmp = RREG32(mmGB_EDC_MODE); 1583 WREG32(mmGB_EDC_MODE, 0); 1584 1585 total_size = 1586 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1587 total_size += 1588 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1589 total_size += 1590 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1591 total_size = ALIGN(total_size, 256); 1592 vgpr_offset = total_size; 1593 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1594 sgpr_offset = total_size; 1595 total_size += sizeof(sgpr_init_compute_shader); 1596 1597 /* allocate an indirect buffer to put the commands in */ 1598 memset(&ib, 0, sizeof(ib)); 1599 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1600 if (r) { 1601 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1602 return r; 1603 } 1604 1605 /* load the compute shaders */ 1606 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1607 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1608 1609 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1610 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1611 1612 /* init the ib length to 0 */ 1613 ib.length_dw = 0; 1614 1615 /* VGPR */ 1616 /* write the register state for the compute dispatch */ 1617 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1618 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1619 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1620 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1621 } 1622 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1623 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1624 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1625 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1626 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1627 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1628 1629 /* write dispatch packet */ 1630 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1631 ib.ptr[ib.length_dw++] = 8; /* x */ 1632 ib.ptr[ib.length_dw++] = 1; /* y */ 1633 ib.ptr[ib.length_dw++] = 1; /* z */ 1634 ib.ptr[ib.length_dw++] = 1635 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1636 1637 /* write CS partial flush packet */ 1638 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1639 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1640 1641 /* SGPR1 */ 1642 /* write the register state for the compute dispatch */ 1643 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1644 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1645 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1646 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1647 } 1648 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1649 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1650 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1651 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1652 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1653 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1654 1655 /* write dispatch packet */ 1656 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1657 ib.ptr[ib.length_dw++] = 8; /* x */ 1658 ib.ptr[ib.length_dw++] = 1; /* y */ 1659 ib.ptr[ib.length_dw++] = 1; /* z */ 1660 ib.ptr[ib.length_dw++] = 1661 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1662 1663 /* write CS partial flush packet */ 1664 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1665 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1666 1667 /* SGPR2 */ 1668 /* write the register state for the compute dispatch */ 1669 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1670 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1671 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1672 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1673 } 1674 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1675 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1676 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1677 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1678 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1679 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1680 1681 /* write dispatch packet */ 1682 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1683 ib.ptr[ib.length_dw++] = 8; /* x */ 1684 ib.ptr[ib.length_dw++] = 1; /* y */ 1685 ib.ptr[ib.length_dw++] = 1; /* z */ 1686 ib.ptr[ib.length_dw++] = 1687 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1688 1689 /* write CS partial flush packet */ 1690 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1691 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1692 1693 /* shedule the ib on the ring */ 1694 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 1695 if (r) { 1696 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1697 goto fail; 1698 } 1699 1700 /* wait for the GPU to finish processing the IB */ 1701 r = dma_fence_wait(f, false); 1702 if (r) { 1703 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1704 goto fail; 1705 } 1706 1707 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1708 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1709 WREG32(mmGB_EDC_MODE, tmp); 1710 1711 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1712 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1713 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1714 1715 1716 /* read back registers to clear the counters */ 1717 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1718 RREG32(sec_ded_counter_registers[i]); 1719 1720 fail: 1721 amdgpu_ib_free(adev, &ib, NULL); 1722 dma_fence_put(f); 1723 1724 return r; 1725 } 1726 1727 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1728 { 1729 u32 gb_addr_config; 1730 u32 mc_shared_chmap, mc_arb_ramcfg; 1731 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1732 u32 tmp; 1733 int ret; 1734 1735 switch (adev->asic_type) { 1736 case CHIP_TOPAZ: 1737 adev->gfx.config.max_shader_engines = 1; 1738 adev->gfx.config.max_tile_pipes = 2; 1739 adev->gfx.config.max_cu_per_sh = 6; 1740 adev->gfx.config.max_sh_per_se = 1; 1741 adev->gfx.config.max_backends_per_se = 2; 1742 adev->gfx.config.max_texture_channel_caches = 2; 1743 adev->gfx.config.max_gprs = 256; 1744 adev->gfx.config.max_gs_threads = 32; 1745 adev->gfx.config.max_hw_contexts = 8; 1746 1747 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1748 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1749 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1750 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1751 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1752 break; 1753 case CHIP_FIJI: 1754 adev->gfx.config.max_shader_engines = 4; 1755 adev->gfx.config.max_tile_pipes = 16; 1756 adev->gfx.config.max_cu_per_sh = 16; 1757 adev->gfx.config.max_sh_per_se = 1; 1758 adev->gfx.config.max_backends_per_se = 4; 1759 adev->gfx.config.max_texture_channel_caches = 16; 1760 adev->gfx.config.max_gprs = 256; 1761 adev->gfx.config.max_gs_threads = 32; 1762 adev->gfx.config.max_hw_contexts = 8; 1763 1764 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1765 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1766 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1767 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1768 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1769 break; 1770 case CHIP_POLARIS11: 1771 ret = amdgpu_atombios_get_gfx_info(adev); 1772 if (ret) 1773 return ret; 1774 adev->gfx.config.max_gprs = 256; 1775 adev->gfx.config.max_gs_threads = 32; 1776 adev->gfx.config.max_hw_contexts = 8; 1777 1778 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1779 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1780 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1781 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1782 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1783 break; 1784 case CHIP_POLARIS10: 1785 ret = amdgpu_atombios_get_gfx_info(adev); 1786 if (ret) 1787 return ret; 1788 adev->gfx.config.max_gprs = 256; 1789 adev->gfx.config.max_gs_threads = 32; 1790 adev->gfx.config.max_hw_contexts = 8; 1791 1792 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1793 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1794 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1795 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1796 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1797 break; 1798 case CHIP_TONGA: 1799 adev->gfx.config.max_shader_engines = 4; 1800 adev->gfx.config.max_tile_pipes = 8; 1801 adev->gfx.config.max_cu_per_sh = 8; 1802 adev->gfx.config.max_sh_per_se = 1; 1803 adev->gfx.config.max_backends_per_se = 2; 1804 adev->gfx.config.max_texture_channel_caches = 8; 1805 adev->gfx.config.max_gprs = 256; 1806 adev->gfx.config.max_gs_threads = 32; 1807 adev->gfx.config.max_hw_contexts = 8; 1808 1809 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1810 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1811 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1812 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1813 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1814 break; 1815 case CHIP_CARRIZO: 1816 adev->gfx.config.max_shader_engines = 1; 1817 adev->gfx.config.max_tile_pipes = 2; 1818 adev->gfx.config.max_sh_per_se = 1; 1819 adev->gfx.config.max_backends_per_se = 2; 1820 1821 switch (adev->pdev->revision) { 1822 case 0xc4: 1823 case 0x84: 1824 case 0xc8: 1825 case 0xcc: 1826 case 0xe1: 1827 case 0xe3: 1828 /* B10 */ 1829 adev->gfx.config.max_cu_per_sh = 8; 1830 break; 1831 case 0xc5: 1832 case 0x81: 1833 case 0x85: 1834 case 0xc9: 1835 case 0xcd: 1836 case 0xe2: 1837 case 0xe4: 1838 /* B8 */ 1839 adev->gfx.config.max_cu_per_sh = 6; 1840 break; 1841 case 0xc6: 1842 case 0xca: 1843 case 0xce: 1844 case 0x88: 1845 /* B6 */ 1846 adev->gfx.config.max_cu_per_sh = 6; 1847 break; 1848 case 0xc7: 1849 case 0x87: 1850 case 0xcb: 1851 case 0xe5: 1852 case 0x89: 1853 default: 1854 /* B4 */ 1855 adev->gfx.config.max_cu_per_sh = 4; 1856 break; 1857 } 1858 1859 adev->gfx.config.max_texture_channel_caches = 2; 1860 adev->gfx.config.max_gprs = 256; 1861 adev->gfx.config.max_gs_threads = 32; 1862 adev->gfx.config.max_hw_contexts = 8; 1863 1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1868 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1869 break; 1870 case CHIP_STONEY: 1871 adev->gfx.config.max_shader_engines = 1; 1872 adev->gfx.config.max_tile_pipes = 2; 1873 adev->gfx.config.max_sh_per_se = 1; 1874 adev->gfx.config.max_backends_per_se = 1; 1875 1876 switch (adev->pdev->revision) { 1877 case 0xc0: 1878 case 0xc1: 1879 case 0xc2: 1880 case 0xc4: 1881 case 0xc8: 1882 case 0xc9: 1883 adev->gfx.config.max_cu_per_sh = 3; 1884 break; 1885 case 0xd0: 1886 case 0xd1: 1887 case 0xd2: 1888 default: 1889 adev->gfx.config.max_cu_per_sh = 2; 1890 break; 1891 } 1892 1893 adev->gfx.config.max_texture_channel_caches = 2; 1894 adev->gfx.config.max_gprs = 256; 1895 adev->gfx.config.max_gs_threads = 16; 1896 adev->gfx.config.max_hw_contexts = 8; 1897 1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1902 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1903 break; 1904 default: 1905 adev->gfx.config.max_shader_engines = 2; 1906 adev->gfx.config.max_tile_pipes = 4; 1907 adev->gfx.config.max_cu_per_sh = 2; 1908 adev->gfx.config.max_sh_per_se = 1; 1909 adev->gfx.config.max_backends_per_se = 2; 1910 adev->gfx.config.max_texture_channel_caches = 4; 1911 adev->gfx.config.max_gprs = 256; 1912 adev->gfx.config.max_gs_threads = 32; 1913 adev->gfx.config.max_hw_contexts = 8; 1914 1915 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1916 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1917 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1918 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1919 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1920 break; 1921 } 1922 1923 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1924 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1925 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1926 1927 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1928 adev->gfx.config.mem_max_burst_length_bytes = 256; 1929 if (adev->flags & AMD_IS_APU) { 1930 /* Get memory bank mapping mode. */ 1931 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1932 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1933 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1934 1935 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1936 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1937 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1938 1939 /* Validate settings in case only one DIMM installed. */ 1940 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1941 dimm00_addr_map = 0; 1942 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1943 dimm01_addr_map = 0; 1944 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1945 dimm10_addr_map = 0; 1946 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1947 dimm11_addr_map = 0; 1948 1949 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1950 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1951 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1952 adev->gfx.config.mem_row_size_in_kb = 2; 1953 else 1954 adev->gfx.config.mem_row_size_in_kb = 1; 1955 } else { 1956 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1957 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1958 if (adev->gfx.config.mem_row_size_in_kb > 4) 1959 adev->gfx.config.mem_row_size_in_kb = 4; 1960 } 1961 1962 adev->gfx.config.shader_engine_tile_size = 32; 1963 adev->gfx.config.num_gpus = 1; 1964 adev->gfx.config.multi_gpu_tile_size = 64; 1965 1966 /* fix up row size */ 1967 switch (adev->gfx.config.mem_row_size_in_kb) { 1968 case 1: 1969 default: 1970 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1971 break; 1972 case 2: 1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1974 break; 1975 case 4: 1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1977 break; 1978 } 1979 adev->gfx.config.gb_addr_config = gb_addr_config; 1980 1981 return 0; 1982 } 1983 1984 static int gfx_v8_0_sw_init(void *handle) 1985 { 1986 int i, r; 1987 struct amdgpu_ring *ring; 1988 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1989 1990 /* EOP Event */ 1991 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 1992 if (r) 1993 return r; 1994 1995 /* Privileged reg */ 1996 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 1997 if (r) 1998 return r; 1999 2000 /* Privileged inst */ 2001 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 2002 if (r) 2003 return r; 2004 2005 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2006 2007 gfx_v8_0_scratch_init(adev); 2008 2009 r = gfx_v8_0_init_microcode(adev); 2010 if (r) { 2011 DRM_ERROR("Failed to load gfx firmware!\n"); 2012 return r; 2013 } 2014 2015 r = gfx_v8_0_rlc_init(adev); 2016 if (r) { 2017 DRM_ERROR("Failed to init rlc BOs!\n"); 2018 return r; 2019 } 2020 2021 r = gfx_v8_0_mec_init(adev); 2022 if (r) { 2023 DRM_ERROR("Failed to init MEC BOs!\n"); 2024 return r; 2025 } 2026 2027 /* set up the gfx ring */ 2028 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2029 ring = &adev->gfx.gfx_ring[i]; 2030 ring->ring_obj = NULL; 2031 sprintf(ring->name, "gfx"); 2032 /* no gfx doorbells on iceland */ 2033 if (adev->asic_type != CHIP_TOPAZ) { 2034 ring->use_doorbell = true; 2035 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2036 } 2037 2038 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2039 AMDGPU_CP_IRQ_GFX_EOP); 2040 if (r) 2041 return r; 2042 } 2043 2044 /* set up the compute queues */ 2045 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2046 unsigned irq_type; 2047 2048 /* max 32 queues per MEC */ 2049 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2050 DRM_ERROR("Too many (%d) compute rings!\n", i); 2051 break; 2052 } 2053 ring = &adev->gfx.compute_ring[i]; 2054 ring->ring_obj = NULL; 2055 ring->use_doorbell = true; 2056 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2057 ring->me = 1; /* first MEC */ 2058 ring->pipe = i / 8; 2059 ring->queue = i % 8; 2060 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2061 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2062 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2063 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2064 irq_type); 2065 if (r) 2066 return r; 2067 } 2068 2069 /* reserve GDS, GWS and OA resource for gfx */ 2070 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2071 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2072 &adev->gds.gds_gfx_bo, NULL, NULL); 2073 if (r) 2074 return r; 2075 2076 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2077 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2078 &adev->gds.gws_gfx_bo, NULL, NULL); 2079 if (r) 2080 return r; 2081 2082 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2083 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2084 &adev->gds.oa_gfx_bo, NULL, NULL); 2085 if (r) 2086 return r; 2087 2088 adev->gfx.ce_ram_size = 0x8000; 2089 2090 r = gfx_v8_0_gpu_early_init(adev); 2091 if (r) 2092 return r; 2093 2094 return 0; 2095 } 2096 2097 static int gfx_v8_0_sw_fini(void *handle) 2098 { 2099 int i; 2100 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2101 2102 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2103 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2104 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2105 2106 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2107 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2108 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2109 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2110 2111 gfx_v8_0_mec_fini(adev); 2112 gfx_v8_0_rlc_fini(adev); 2113 gfx_v8_0_free_microcode(adev); 2114 2115 return 0; 2116 } 2117 2118 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2119 { 2120 uint32_t *modearray, *mod2array; 2121 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2122 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2123 u32 reg_offset; 2124 2125 modearray = adev->gfx.config.tile_mode_array; 2126 mod2array = adev->gfx.config.macrotile_mode_array; 2127 2128 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2129 modearray[reg_offset] = 0; 2130 2131 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2132 mod2array[reg_offset] = 0; 2133 2134 switch (adev->asic_type) { 2135 case CHIP_TOPAZ: 2136 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2137 PIPE_CONFIG(ADDR_SURF_P2) | 2138 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2140 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2141 PIPE_CONFIG(ADDR_SURF_P2) | 2142 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2143 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2144 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2145 PIPE_CONFIG(ADDR_SURF_P2) | 2146 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2148 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2149 PIPE_CONFIG(ADDR_SURF_P2) | 2150 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2151 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2152 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2153 PIPE_CONFIG(ADDR_SURF_P2) | 2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2156 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2157 PIPE_CONFIG(ADDR_SURF_P2) | 2158 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2160 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2161 PIPE_CONFIG(ADDR_SURF_P2) | 2162 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2164 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2165 PIPE_CONFIG(ADDR_SURF_P2)); 2166 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2167 PIPE_CONFIG(ADDR_SURF_P2) | 2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2170 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2171 PIPE_CONFIG(ADDR_SURF_P2) | 2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2174 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2175 PIPE_CONFIG(ADDR_SURF_P2) | 2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2178 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2179 PIPE_CONFIG(ADDR_SURF_P2) | 2180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2182 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2183 PIPE_CONFIG(ADDR_SURF_P2) | 2184 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2186 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2187 PIPE_CONFIG(ADDR_SURF_P2) | 2188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2190 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2191 PIPE_CONFIG(ADDR_SURF_P2) | 2192 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2194 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2195 PIPE_CONFIG(ADDR_SURF_P2) | 2196 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2198 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2199 PIPE_CONFIG(ADDR_SURF_P2) | 2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2202 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2203 PIPE_CONFIG(ADDR_SURF_P2) | 2204 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2206 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2207 PIPE_CONFIG(ADDR_SURF_P2) | 2208 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2210 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2211 PIPE_CONFIG(ADDR_SURF_P2) | 2212 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2214 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2215 PIPE_CONFIG(ADDR_SURF_P2) | 2216 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2218 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2219 PIPE_CONFIG(ADDR_SURF_P2) | 2220 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2222 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2223 PIPE_CONFIG(ADDR_SURF_P2) | 2224 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2226 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2227 PIPE_CONFIG(ADDR_SURF_P2) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2230 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2231 PIPE_CONFIG(ADDR_SURF_P2) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2234 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2235 PIPE_CONFIG(ADDR_SURF_P2) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2238 2239 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2242 NUM_BANKS(ADDR_SURF_8_BANK)); 2243 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2246 NUM_BANKS(ADDR_SURF_8_BANK)); 2247 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2250 NUM_BANKS(ADDR_SURF_8_BANK)); 2251 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2252 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2253 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2254 NUM_BANKS(ADDR_SURF_8_BANK)); 2255 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2258 NUM_BANKS(ADDR_SURF_8_BANK)); 2259 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2262 NUM_BANKS(ADDR_SURF_8_BANK)); 2263 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2264 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2265 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2266 NUM_BANKS(ADDR_SURF_8_BANK)); 2267 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2270 NUM_BANKS(ADDR_SURF_16_BANK)); 2271 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2274 NUM_BANKS(ADDR_SURF_16_BANK)); 2275 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2278 NUM_BANKS(ADDR_SURF_16_BANK)); 2279 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2282 NUM_BANKS(ADDR_SURF_16_BANK)); 2283 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2286 NUM_BANKS(ADDR_SURF_16_BANK)); 2287 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2290 NUM_BANKS(ADDR_SURF_16_BANK)); 2291 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2294 NUM_BANKS(ADDR_SURF_8_BANK)); 2295 2296 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2297 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2298 reg_offset != 23) 2299 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2300 2301 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2302 if (reg_offset != 7) 2303 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2304 2305 break; 2306 case CHIP_FIJI: 2307 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2311 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2313 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2314 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2315 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2317 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2319 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2320 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2321 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2323 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2327 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2331 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2335 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2336 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2339 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2341 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2342 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2345 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2346 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2349 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2350 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2353 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2354 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2357 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2358 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2361 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2362 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2365 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2366 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2369 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2370 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2373 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2374 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2377 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2381 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2383 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2385 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2387 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2389 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2390 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2391 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2393 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2395 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2397 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2398 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2401 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2405 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2409 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2413 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2415 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2417 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2419 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2421 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2423 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2425 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2426 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2427 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2429 2430 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2433 NUM_BANKS(ADDR_SURF_8_BANK)); 2434 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2437 NUM_BANKS(ADDR_SURF_8_BANK)); 2438 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2441 NUM_BANKS(ADDR_SURF_8_BANK)); 2442 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2445 NUM_BANKS(ADDR_SURF_8_BANK)); 2446 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2449 NUM_BANKS(ADDR_SURF_8_BANK)); 2450 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2453 NUM_BANKS(ADDR_SURF_8_BANK)); 2454 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2457 NUM_BANKS(ADDR_SURF_8_BANK)); 2458 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2461 NUM_BANKS(ADDR_SURF_8_BANK)); 2462 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2465 NUM_BANKS(ADDR_SURF_8_BANK)); 2466 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2469 NUM_BANKS(ADDR_SURF_8_BANK)); 2470 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2473 NUM_BANKS(ADDR_SURF_8_BANK)); 2474 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2477 NUM_BANKS(ADDR_SURF_8_BANK)); 2478 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2481 NUM_BANKS(ADDR_SURF_8_BANK)); 2482 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2485 NUM_BANKS(ADDR_SURF_4_BANK)); 2486 2487 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2488 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2489 2490 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2491 if (reg_offset != 7) 2492 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2493 2494 break; 2495 case CHIP_TONGA: 2496 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2498 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2499 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2500 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2502 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2504 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2506 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2507 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2508 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2509 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2510 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2512 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2516 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2518 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2520 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2524 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2525 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2528 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2530 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2531 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2532 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2533 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2534 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2535 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2538 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2539 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2541 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2542 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2543 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2545 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2546 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2548 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2550 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2552 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2554 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2556 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2558 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2559 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2560 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2562 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2563 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2564 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2566 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2567 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2570 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2571 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2572 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2574 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2576 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2578 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2580 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2582 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2584 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2586 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2587 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2588 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2590 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2592 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2594 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2596 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2598 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2600 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2602 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2604 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2606 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2608 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2609 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2610 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2612 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2614 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2615 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2616 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2618 2619 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2620 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2621 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2622 NUM_BANKS(ADDR_SURF_16_BANK)); 2623 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2624 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2625 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2626 NUM_BANKS(ADDR_SURF_16_BANK)); 2627 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2630 NUM_BANKS(ADDR_SURF_16_BANK)); 2631 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2634 NUM_BANKS(ADDR_SURF_16_BANK)); 2635 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2638 NUM_BANKS(ADDR_SURF_16_BANK)); 2639 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2642 NUM_BANKS(ADDR_SURF_16_BANK)); 2643 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2646 NUM_BANKS(ADDR_SURF_16_BANK)); 2647 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2650 NUM_BANKS(ADDR_SURF_16_BANK)); 2651 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2654 NUM_BANKS(ADDR_SURF_16_BANK)); 2655 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2658 NUM_BANKS(ADDR_SURF_16_BANK)); 2659 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2662 NUM_BANKS(ADDR_SURF_16_BANK)); 2663 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2666 NUM_BANKS(ADDR_SURF_8_BANK)); 2667 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2670 NUM_BANKS(ADDR_SURF_4_BANK)); 2671 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2674 NUM_BANKS(ADDR_SURF_4_BANK)); 2675 2676 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2677 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2678 2679 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2680 if (reg_offset != 7) 2681 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2682 2683 break; 2684 case CHIP_POLARIS11: 2685 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2687 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2688 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2689 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2690 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2691 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2692 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2693 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2695 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2696 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2697 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2698 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2699 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2700 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2701 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2702 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2703 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2704 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2705 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2706 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2707 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2708 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2709 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2711 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2712 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2713 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2714 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2715 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2716 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2717 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2718 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2719 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2723 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2725 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2727 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2729 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2731 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2733 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2735 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2737 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2739 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2741 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2743 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2745 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2747 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2749 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2751 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2753 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2755 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2757 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2759 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2760 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2761 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2762 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2763 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2764 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2765 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2766 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2767 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2768 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2769 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2771 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2772 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2773 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2775 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2776 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2777 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2778 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2779 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2780 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2781 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2782 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2783 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2784 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2785 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2786 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2787 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2788 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2789 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2790 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2791 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2793 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2795 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2797 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2799 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2801 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2803 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2805 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2807 2808 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2811 NUM_BANKS(ADDR_SURF_16_BANK)); 2812 2813 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2816 NUM_BANKS(ADDR_SURF_16_BANK)); 2817 2818 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2819 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2820 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2821 NUM_BANKS(ADDR_SURF_16_BANK)); 2822 2823 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2824 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2825 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2826 NUM_BANKS(ADDR_SURF_16_BANK)); 2827 2828 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2829 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2830 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2831 NUM_BANKS(ADDR_SURF_16_BANK)); 2832 2833 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2836 NUM_BANKS(ADDR_SURF_16_BANK)); 2837 2838 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2839 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2840 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2841 NUM_BANKS(ADDR_SURF_16_BANK)); 2842 2843 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2846 NUM_BANKS(ADDR_SURF_16_BANK)); 2847 2848 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2849 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2850 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2851 NUM_BANKS(ADDR_SURF_16_BANK)); 2852 2853 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2854 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2855 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2856 NUM_BANKS(ADDR_SURF_16_BANK)); 2857 2858 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2859 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2860 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2861 NUM_BANKS(ADDR_SURF_16_BANK)); 2862 2863 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2864 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2865 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2866 NUM_BANKS(ADDR_SURF_16_BANK)); 2867 2868 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2869 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2870 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2871 NUM_BANKS(ADDR_SURF_8_BANK)); 2872 2873 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2876 NUM_BANKS(ADDR_SURF_4_BANK)); 2877 2878 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2879 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2880 2881 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2882 if (reg_offset != 7) 2883 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2884 2885 break; 2886 case CHIP_POLARIS10: 2887 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2888 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2889 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2890 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2891 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2892 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2893 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2895 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2896 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2897 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2898 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2899 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2900 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2901 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2902 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2903 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2904 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2905 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2906 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2907 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2908 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2909 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2910 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2911 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2912 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2913 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2914 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2915 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2917 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2918 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2919 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2920 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2921 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2923 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2924 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2925 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2928 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2929 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2931 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2933 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2934 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2935 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2936 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2937 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2939 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2940 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2941 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2943 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2945 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2947 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2949 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2950 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2951 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2953 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2954 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2955 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2956 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2957 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2959 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2961 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2963 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2964 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2965 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2966 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2967 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2968 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2969 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2970 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2971 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2972 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2973 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2975 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2976 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2977 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2978 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2979 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2980 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2981 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2982 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2983 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2985 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2986 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2987 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2988 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2989 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2990 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2991 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2992 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2993 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2994 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2995 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2997 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2999 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3001 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3003 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3005 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3006 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3007 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3009 3010 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3011 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3012 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3013 NUM_BANKS(ADDR_SURF_16_BANK)); 3014 3015 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3018 NUM_BANKS(ADDR_SURF_16_BANK)); 3019 3020 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3023 NUM_BANKS(ADDR_SURF_16_BANK)); 3024 3025 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3028 NUM_BANKS(ADDR_SURF_16_BANK)); 3029 3030 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3031 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3032 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3033 NUM_BANKS(ADDR_SURF_16_BANK)); 3034 3035 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3036 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3037 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3038 NUM_BANKS(ADDR_SURF_16_BANK)); 3039 3040 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3041 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3042 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3043 NUM_BANKS(ADDR_SURF_16_BANK)); 3044 3045 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3046 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3047 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3048 NUM_BANKS(ADDR_SURF_16_BANK)); 3049 3050 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3053 NUM_BANKS(ADDR_SURF_16_BANK)); 3054 3055 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3056 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3057 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3058 NUM_BANKS(ADDR_SURF_16_BANK)); 3059 3060 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3063 NUM_BANKS(ADDR_SURF_16_BANK)); 3064 3065 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3068 NUM_BANKS(ADDR_SURF_8_BANK)); 3069 3070 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3071 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3072 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3073 NUM_BANKS(ADDR_SURF_4_BANK)); 3074 3075 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3078 NUM_BANKS(ADDR_SURF_4_BANK)); 3079 3080 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3081 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3082 3083 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3084 if (reg_offset != 7) 3085 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3086 3087 break; 3088 case CHIP_STONEY: 3089 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3090 PIPE_CONFIG(ADDR_SURF_P2) | 3091 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3092 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3093 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3094 PIPE_CONFIG(ADDR_SURF_P2) | 3095 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3097 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3098 PIPE_CONFIG(ADDR_SURF_P2) | 3099 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3100 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3101 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3102 PIPE_CONFIG(ADDR_SURF_P2) | 3103 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3105 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3106 PIPE_CONFIG(ADDR_SURF_P2) | 3107 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3108 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3109 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3110 PIPE_CONFIG(ADDR_SURF_P2) | 3111 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3113 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3114 PIPE_CONFIG(ADDR_SURF_P2) | 3115 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3116 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3117 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3118 PIPE_CONFIG(ADDR_SURF_P2)); 3119 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3120 PIPE_CONFIG(ADDR_SURF_P2) | 3121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3123 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3124 PIPE_CONFIG(ADDR_SURF_P2) | 3125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3127 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3128 PIPE_CONFIG(ADDR_SURF_P2) | 3129 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3131 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3132 PIPE_CONFIG(ADDR_SURF_P2) | 3133 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3135 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3136 PIPE_CONFIG(ADDR_SURF_P2) | 3137 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3139 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3140 PIPE_CONFIG(ADDR_SURF_P2) | 3141 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3143 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3144 PIPE_CONFIG(ADDR_SURF_P2) | 3145 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3147 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3148 PIPE_CONFIG(ADDR_SURF_P2) | 3149 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3151 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3152 PIPE_CONFIG(ADDR_SURF_P2) | 3153 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3155 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3156 PIPE_CONFIG(ADDR_SURF_P2) | 3157 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3159 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3160 PIPE_CONFIG(ADDR_SURF_P2) | 3161 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3163 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3164 PIPE_CONFIG(ADDR_SURF_P2) | 3165 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3167 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3168 PIPE_CONFIG(ADDR_SURF_P2) | 3169 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3171 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3172 PIPE_CONFIG(ADDR_SURF_P2) | 3173 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3175 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3176 PIPE_CONFIG(ADDR_SURF_P2) | 3177 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3179 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3180 PIPE_CONFIG(ADDR_SURF_P2) | 3181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3183 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3184 PIPE_CONFIG(ADDR_SURF_P2) | 3185 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3187 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3188 PIPE_CONFIG(ADDR_SURF_P2) | 3189 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3191 3192 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3195 NUM_BANKS(ADDR_SURF_8_BANK)); 3196 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3199 NUM_BANKS(ADDR_SURF_8_BANK)); 3200 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3203 NUM_BANKS(ADDR_SURF_8_BANK)); 3204 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3207 NUM_BANKS(ADDR_SURF_8_BANK)); 3208 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3211 NUM_BANKS(ADDR_SURF_8_BANK)); 3212 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3215 NUM_BANKS(ADDR_SURF_8_BANK)); 3216 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3219 NUM_BANKS(ADDR_SURF_8_BANK)); 3220 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3221 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3222 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3223 NUM_BANKS(ADDR_SURF_16_BANK)); 3224 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3227 NUM_BANKS(ADDR_SURF_16_BANK)); 3228 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3231 NUM_BANKS(ADDR_SURF_16_BANK)); 3232 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3235 NUM_BANKS(ADDR_SURF_16_BANK)); 3236 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3239 NUM_BANKS(ADDR_SURF_16_BANK)); 3240 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3243 NUM_BANKS(ADDR_SURF_16_BANK)); 3244 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3245 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3246 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3247 NUM_BANKS(ADDR_SURF_8_BANK)); 3248 3249 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3250 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3251 reg_offset != 23) 3252 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3253 3254 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3255 if (reg_offset != 7) 3256 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3257 3258 break; 3259 default: 3260 dev_warn(adev->dev, 3261 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3262 adev->asic_type); 3263 3264 case CHIP_CARRIZO: 3265 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3266 PIPE_CONFIG(ADDR_SURF_P2) | 3267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3269 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3270 PIPE_CONFIG(ADDR_SURF_P2) | 3271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3273 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3274 PIPE_CONFIG(ADDR_SURF_P2) | 3275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3277 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3278 PIPE_CONFIG(ADDR_SURF_P2) | 3279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3281 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3282 PIPE_CONFIG(ADDR_SURF_P2) | 3283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3285 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3286 PIPE_CONFIG(ADDR_SURF_P2) | 3287 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3288 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3289 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3290 PIPE_CONFIG(ADDR_SURF_P2) | 3291 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3292 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3293 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3294 PIPE_CONFIG(ADDR_SURF_P2)); 3295 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3299 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3303 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3307 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3311 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3312 PIPE_CONFIG(ADDR_SURF_P2) | 3313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3315 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3316 PIPE_CONFIG(ADDR_SURF_P2) | 3317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3319 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3320 PIPE_CONFIG(ADDR_SURF_P2) | 3321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3323 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3324 PIPE_CONFIG(ADDR_SURF_P2) | 3325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3327 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3328 PIPE_CONFIG(ADDR_SURF_P2) | 3329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3331 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3332 PIPE_CONFIG(ADDR_SURF_P2) | 3333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3335 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3336 PIPE_CONFIG(ADDR_SURF_P2) | 3337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3339 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3340 PIPE_CONFIG(ADDR_SURF_P2) | 3341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3343 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3344 PIPE_CONFIG(ADDR_SURF_P2) | 3345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3347 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3348 PIPE_CONFIG(ADDR_SURF_P2) | 3349 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3351 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3352 PIPE_CONFIG(ADDR_SURF_P2) | 3353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3355 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3356 PIPE_CONFIG(ADDR_SURF_P2) | 3357 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3359 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3360 PIPE_CONFIG(ADDR_SURF_P2) | 3361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3363 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3364 PIPE_CONFIG(ADDR_SURF_P2) | 3365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3367 3368 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3371 NUM_BANKS(ADDR_SURF_8_BANK)); 3372 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3375 NUM_BANKS(ADDR_SURF_8_BANK)); 3376 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3379 NUM_BANKS(ADDR_SURF_8_BANK)); 3380 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3383 NUM_BANKS(ADDR_SURF_8_BANK)); 3384 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3387 NUM_BANKS(ADDR_SURF_8_BANK)); 3388 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3391 NUM_BANKS(ADDR_SURF_8_BANK)); 3392 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3395 NUM_BANKS(ADDR_SURF_8_BANK)); 3396 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3399 NUM_BANKS(ADDR_SURF_16_BANK)); 3400 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3403 NUM_BANKS(ADDR_SURF_16_BANK)); 3404 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3407 NUM_BANKS(ADDR_SURF_16_BANK)); 3408 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3411 NUM_BANKS(ADDR_SURF_16_BANK)); 3412 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3415 NUM_BANKS(ADDR_SURF_16_BANK)); 3416 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3419 NUM_BANKS(ADDR_SURF_16_BANK)); 3420 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3423 NUM_BANKS(ADDR_SURF_8_BANK)); 3424 3425 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3426 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3427 reg_offset != 23) 3428 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3429 3430 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3431 if (reg_offset != 7) 3432 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3433 3434 break; 3435 } 3436 } 3437 3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3439 u32 se_num, u32 sh_num, u32 instance) 3440 { 3441 u32 data; 3442 3443 if (instance == 0xffffffff) 3444 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3445 else 3446 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3447 3448 if (se_num == 0xffffffff) 3449 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3450 else 3451 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3452 3453 if (sh_num == 0xffffffff) 3454 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3455 else 3456 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3457 3458 WREG32(mmGRBM_GFX_INDEX, data); 3459 } 3460 3461 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3462 { 3463 return (u32)((1ULL << bit_width) - 1); 3464 } 3465 3466 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3467 { 3468 u32 data, mask; 3469 3470 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3471 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3472 3473 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3474 3475 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3476 adev->gfx.config.max_sh_per_se); 3477 3478 return (~data) & mask; 3479 } 3480 3481 static void 3482 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3483 { 3484 switch (adev->asic_type) { 3485 case CHIP_FIJI: 3486 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3487 RB_XSEL2(1) | PKR_MAP(2) | 3488 PKR_XSEL(1) | PKR_YSEL(1) | 3489 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3490 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3491 SE_PAIR_YSEL(2); 3492 break; 3493 case CHIP_TONGA: 3494 case CHIP_POLARIS10: 3495 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3496 SE_XSEL(1) | SE_YSEL(1); 3497 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3498 SE_PAIR_YSEL(2); 3499 break; 3500 case CHIP_TOPAZ: 3501 case CHIP_CARRIZO: 3502 *rconf |= RB_MAP_PKR0(2); 3503 *rconf1 |= 0x0; 3504 break; 3505 case CHIP_POLARIS11: 3506 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3507 SE_XSEL(1) | SE_YSEL(1); 3508 *rconf1 |= 0x0; 3509 break; 3510 case CHIP_STONEY: 3511 *rconf |= 0x0; 3512 *rconf1 |= 0x0; 3513 break; 3514 default: 3515 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3516 break; 3517 } 3518 } 3519 3520 static void 3521 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3522 u32 raster_config, u32 raster_config_1, 3523 unsigned rb_mask, unsigned num_rb) 3524 { 3525 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3526 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3527 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3528 unsigned rb_per_se = num_rb / num_se; 3529 unsigned se_mask[4]; 3530 unsigned se; 3531 3532 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3533 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3534 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3535 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3536 3537 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3538 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3539 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3540 3541 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3542 (!se_mask[2] && !se_mask[3]))) { 3543 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3544 3545 if (!se_mask[0] && !se_mask[1]) { 3546 raster_config_1 |= 3547 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3548 } else { 3549 raster_config_1 |= 3550 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3551 } 3552 } 3553 3554 for (se = 0; se < num_se; se++) { 3555 unsigned raster_config_se = raster_config; 3556 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3557 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3558 int idx = (se / 2) * 2; 3559 3560 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3561 raster_config_se &= ~SE_MAP_MASK; 3562 3563 if (!se_mask[idx]) { 3564 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3565 } else { 3566 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3567 } 3568 } 3569 3570 pkr0_mask &= rb_mask; 3571 pkr1_mask &= rb_mask; 3572 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3573 raster_config_se &= ~PKR_MAP_MASK; 3574 3575 if (!pkr0_mask) { 3576 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3577 } else { 3578 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3579 } 3580 } 3581 3582 if (rb_per_se >= 2) { 3583 unsigned rb0_mask = 1 << (se * rb_per_se); 3584 unsigned rb1_mask = rb0_mask << 1; 3585 3586 rb0_mask &= rb_mask; 3587 rb1_mask &= rb_mask; 3588 if (!rb0_mask || !rb1_mask) { 3589 raster_config_se &= ~RB_MAP_PKR0_MASK; 3590 3591 if (!rb0_mask) { 3592 raster_config_se |= 3593 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3594 } else { 3595 raster_config_se |= 3596 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3597 } 3598 } 3599 3600 if (rb_per_se > 2) { 3601 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3602 rb1_mask = rb0_mask << 1; 3603 rb0_mask &= rb_mask; 3604 rb1_mask &= rb_mask; 3605 if (!rb0_mask || !rb1_mask) { 3606 raster_config_se &= ~RB_MAP_PKR1_MASK; 3607 3608 if (!rb0_mask) { 3609 raster_config_se |= 3610 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3611 } else { 3612 raster_config_se |= 3613 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3614 } 3615 } 3616 } 3617 } 3618 3619 /* GRBM_GFX_INDEX has a different offset on VI */ 3620 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3621 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3622 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3623 } 3624 3625 /* GRBM_GFX_INDEX has a different offset on VI */ 3626 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3627 } 3628 3629 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3630 { 3631 int i, j; 3632 u32 data; 3633 u32 raster_config = 0, raster_config_1 = 0; 3634 u32 active_rbs = 0; 3635 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3636 adev->gfx.config.max_sh_per_se; 3637 unsigned num_rb_pipes; 3638 3639 mutex_lock(&adev->grbm_idx_mutex); 3640 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3641 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3642 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3643 data = gfx_v8_0_get_rb_active_bitmap(adev); 3644 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3645 rb_bitmap_width_per_sh); 3646 } 3647 } 3648 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3649 3650 adev->gfx.config.backend_enable_mask = active_rbs; 3651 adev->gfx.config.num_rbs = hweight32(active_rbs); 3652 3653 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3654 adev->gfx.config.max_shader_engines, 16); 3655 3656 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3657 3658 if (!adev->gfx.config.backend_enable_mask || 3659 adev->gfx.config.num_rbs >= num_rb_pipes) { 3660 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3661 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3662 } else { 3663 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3664 adev->gfx.config.backend_enable_mask, 3665 num_rb_pipes); 3666 } 3667 3668 /* cache the values for userspace */ 3669 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3670 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3671 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3672 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3673 RREG32(mmCC_RB_BACKEND_DISABLE); 3674 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3675 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3676 adev->gfx.config.rb_config[i][j].raster_config = 3677 RREG32(mmPA_SC_RASTER_CONFIG); 3678 adev->gfx.config.rb_config[i][j].raster_config_1 = 3679 RREG32(mmPA_SC_RASTER_CONFIG_1); 3680 } 3681 } 3682 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3683 mutex_unlock(&adev->grbm_idx_mutex); 3684 } 3685 3686 /** 3687 * gfx_v8_0_init_compute_vmid - gart enable 3688 * 3689 * @rdev: amdgpu_device pointer 3690 * 3691 * Initialize compute vmid sh_mem registers 3692 * 3693 */ 3694 #define DEFAULT_SH_MEM_BASES (0x6000) 3695 #define FIRST_COMPUTE_VMID (8) 3696 #define LAST_COMPUTE_VMID (16) 3697 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3698 { 3699 int i; 3700 uint32_t sh_mem_config; 3701 uint32_t sh_mem_bases; 3702 3703 /* 3704 * Configure apertures: 3705 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3706 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3707 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3708 */ 3709 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3710 3711 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3712 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3713 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3714 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3715 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3716 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3717 3718 mutex_lock(&adev->srbm_mutex); 3719 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3720 vi_srbm_select(adev, 0, 0, 0, i); 3721 /* CP and shaders */ 3722 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3723 WREG32(mmSH_MEM_APE1_BASE, 1); 3724 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3725 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3726 } 3727 vi_srbm_select(adev, 0, 0, 0, 0); 3728 mutex_unlock(&adev->srbm_mutex); 3729 } 3730 3731 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3732 { 3733 u32 tmp; 3734 int i; 3735 3736 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3737 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3738 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3739 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3740 3741 gfx_v8_0_tiling_mode_table_init(adev); 3742 gfx_v8_0_setup_rb(adev); 3743 gfx_v8_0_get_cu_info(adev); 3744 3745 /* XXX SH_MEM regs */ 3746 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3747 mutex_lock(&adev->srbm_mutex); 3748 for (i = 0; i < 16; i++) { 3749 vi_srbm_select(adev, 0, 0, 0, i); 3750 /* CP and shaders */ 3751 if (i == 0) { 3752 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3753 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3754 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3755 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3756 WREG32(mmSH_MEM_CONFIG, tmp); 3757 } else { 3758 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3759 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 3760 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3761 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3762 WREG32(mmSH_MEM_CONFIG, tmp); 3763 } 3764 3765 WREG32(mmSH_MEM_APE1_BASE, 1); 3766 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3767 WREG32(mmSH_MEM_BASES, 0); 3768 } 3769 vi_srbm_select(adev, 0, 0, 0, 0); 3770 mutex_unlock(&adev->srbm_mutex); 3771 3772 gfx_v8_0_init_compute_vmid(adev); 3773 3774 mutex_lock(&adev->grbm_idx_mutex); 3775 /* 3776 * making sure that the following register writes will be broadcasted 3777 * to all the shaders 3778 */ 3779 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3780 3781 WREG32(mmPA_SC_FIFO_SIZE, 3782 (adev->gfx.config.sc_prim_fifo_size_frontend << 3783 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3784 (adev->gfx.config.sc_prim_fifo_size_backend << 3785 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3786 (adev->gfx.config.sc_hiz_tile_fifo_size << 3787 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3788 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3789 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3790 mutex_unlock(&adev->grbm_idx_mutex); 3791 3792 } 3793 3794 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3795 { 3796 u32 i, j, k; 3797 u32 mask; 3798 3799 mutex_lock(&adev->grbm_idx_mutex); 3800 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3801 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3802 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3803 for (k = 0; k < adev->usec_timeout; k++) { 3804 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3805 break; 3806 udelay(1); 3807 } 3808 } 3809 } 3810 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3811 mutex_unlock(&adev->grbm_idx_mutex); 3812 3813 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3814 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3815 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3816 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3817 for (k = 0; k < adev->usec_timeout; k++) { 3818 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3819 break; 3820 udelay(1); 3821 } 3822 } 3823 3824 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3825 bool enable) 3826 { 3827 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3828 3829 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3830 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3831 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3832 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3833 3834 WREG32(mmCP_INT_CNTL_RING0, tmp); 3835 } 3836 3837 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3838 { 3839 /* csib */ 3840 WREG32(mmRLC_CSIB_ADDR_HI, 3841 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3842 WREG32(mmRLC_CSIB_ADDR_LO, 3843 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3844 WREG32(mmRLC_CSIB_LENGTH, 3845 adev->gfx.rlc.clear_state_size); 3846 } 3847 3848 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3849 int ind_offset, 3850 int list_size, 3851 int *unique_indices, 3852 int *indices_count, 3853 int max_indices, 3854 int *ind_start_offsets, 3855 int *offset_count, 3856 int max_offset) 3857 { 3858 int indices; 3859 bool new_entry = true; 3860 3861 for (; ind_offset < list_size; ind_offset++) { 3862 3863 if (new_entry) { 3864 new_entry = false; 3865 ind_start_offsets[*offset_count] = ind_offset; 3866 *offset_count = *offset_count + 1; 3867 BUG_ON(*offset_count >= max_offset); 3868 } 3869 3870 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3871 new_entry = true; 3872 continue; 3873 } 3874 3875 ind_offset += 2; 3876 3877 /* look for the matching indice */ 3878 for (indices = 0; 3879 indices < *indices_count; 3880 indices++) { 3881 if (unique_indices[indices] == 3882 register_list_format[ind_offset]) 3883 break; 3884 } 3885 3886 if (indices >= *indices_count) { 3887 unique_indices[*indices_count] = 3888 register_list_format[ind_offset]; 3889 indices = *indices_count; 3890 *indices_count = *indices_count + 1; 3891 BUG_ON(*indices_count >= max_indices); 3892 } 3893 3894 register_list_format[ind_offset] = indices; 3895 } 3896 } 3897 3898 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3899 { 3900 int i, temp, data; 3901 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3902 int indices_count = 0; 3903 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3904 int offset_count = 0; 3905 3906 int list_size; 3907 unsigned int *register_list_format = 3908 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3909 if (!register_list_format) 3910 return -ENOMEM; 3911 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3912 adev->gfx.rlc.reg_list_format_size_bytes); 3913 3914 gfx_v8_0_parse_ind_reg_list(register_list_format, 3915 RLC_FormatDirectRegListLength, 3916 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3917 unique_indices, 3918 &indices_count, 3919 sizeof(unique_indices) / sizeof(int), 3920 indirect_start_offsets, 3921 &offset_count, 3922 sizeof(indirect_start_offsets)/sizeof(int)); 3923 3924 /* save and restore list */ 3925 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3926 3927 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3928 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3929 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3930 3931 /* indirect list */ 3932 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3933 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3934 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3935 3936 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3937 list_size = list_size >> 1; 3938 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3939 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3940 3941 /* starting offsets starts */ 3942 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3943 adev->gfx.rlc.starting_offsets_start); 3944 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3945 WREG32(mmRLC_GPM_SCRATCH_DATA, 3946 indirect_start_offsets[i]); 3947 3948 /* unique indices */ 3949 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3950 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3951 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3952 if (unique_indices[i] != 0) { 3953 amdgpu_mm_wreg(adev, temp + i, 3954 unique_indices[i] & 0x3FFFF, false); 3955 amdgpu_mm_wreg(adev, data + i, 3956 unique_indices[i] >> 20, false); 3957 } 3958 } 3959 kfree(register_list_format); 3960 3961 return 0; 3962 } 3963 3964 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3965 { 3966 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 3967 } 3968 3969 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 3970 { 3971 uint32_t data; 3972 3973 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 3974 3975 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 3976 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 3977 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 3978 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 3979 WREG32(mmRLC_PG_DELAY, data); 3980 3981 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 3982 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 3983 3984 } 3985 3986 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3987 bool enable) 3988 { 3989 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 3990 } 3991 3992 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 3993 bool enable) 3994 { 3995 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 3996 } 3997 3998 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 3999 { 4000 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4001 } 4002 4003 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4004 { 4005 if ((adev->asic_type == CHIP_CARRIZO) || 4006 (adev->asic_type == CHIP_STONEY)) { 4007 gfx_v8_0_init_csb(adev); 4008 gfx_v8_0_init_save_restore_list(adev); 4009 gfx_v8_0_enable_save_restore_machine(adev); 4010 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4011 gfx_v8_0_init_power_gating(adev); 4012 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4013 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4014 cz_enable_sck_slow_down_on_power_up(adev, true); 4015 cz_enable_sck_slow_down_on_power_down(adev, true); 4016 } else { 4017 cz_enable_sck_slow_down_on_power_up(adev, false); 4018 cz_enable_sck_slow_down_on_power_down(adev, false); 4019 } 4020 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4021 cz_enable_cp_power_gating(adev, true); 4022 else 4023 cz_enable_cp_power_gating(adev, false); 4024 } else if (adev->asic_type == CHIP_POLARIS11) { 4025 gfx_v8_0_init_csb(adev); 4026 gfx_v8_0_init_save_restore_list(adev); 4027 gfx_v8_0_enable_save_restore_machine(adev); 4028 gfx_v8_0_init_power_gating(adev); 4029 } 4030 4031 } 4032 4033 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4034 { 4035 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4036 4037 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4038 gfx_v8_0_wait_for_rlc_serdes(adev); 4039 } 4040 4041 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4042 { 4043 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4044 udelay(50); 4045 4046 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4047 udelay(50); 4048 } 4049 4050 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4051 { 4052 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4053 4054 /* carrizo do enable cp interrupt after cp inited */ 4055 if (!(adev->flags & AMD_IS_APU)) 4056 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4057 4058 udelay(50); 4059 } 4060 4061 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4062 { 4063 const struct rlc_firmware_header_v2_0 *hdr; 4064 const __le32 *fw_data; 4065 unsigned i, fw_size; 4066 4067 if (!adev->gfx.rlc_fw) 4068 return -EINVAL; 4069 4070 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4071 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4072 4073 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4074 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4075 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4076 4077 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4078 for (i = 0; i < fw_size; i++) 4079 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4080 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4081 4082 return 0; 4083 } 4084 4085 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4086 { 4087 int r; 4088 u32 tmp; 4089 4090 gfx_v8_0_rlc_stop(adev); 4091 4092 /* disable CG */ 4093 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4094 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4095 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4096 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4097 if (adev->asic_type == CHIP_POLARIS11 || 4098 adev->asic_type == CHIP_POLARIS10) { 4099 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4100 tmp &= ~0x3; 4101 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4102 } 4103 4104 /* disable PG */ 4105 WREG32(mmRLC_PG_CNTL, 0); 4106 4107 gfx_v8_0_rlc_reset(adev); 4108 gfx_v8_0_init_pg(adev); 4109 4110 if (!adev->pp_enabled) { 4111 if (!adev->firmware.smu_load) { 4112 /* legacy rlc firmware loading */ 4113 r = gfx_v8_0_rlc_load_microcode(adev); 4114 if (r) 4115 return r; 4116 } else { 4117 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4118 AMDGPU_UCODE_ID_RLC_G); 4119 if (r) 4120 return -EINVAL; 4121 } 4122 } 4123 4124 gfx_v8_0_rlc_start(adev); 4125 4126 return 0; 4127 } 4128 4129 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4130 { 4131 int i; 4132 u32 tmp = RREG32(mmCP_ME_CNTL); 4133 4134 if (enable) { 4135 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4136 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4137 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4138 } else { 4139 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4140 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4141 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4142 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4143 adev->gfx.gfx_ring[i].ready = false; 4144 } 4145 WREG32(mmCP_ME_CNTL, tmp); 4146 udelay(50); 4147 } 4148 4149 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4150 { 4151 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4152 const struct gfx_firmware_header_v1_0 *ce_hdr; 4153 const struct gfx_firmware_header_v1_0 *me_hdr; 4154 const __le32 *fw_data; 4155 unsigned i, fw_size; 4156 4157 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4158 return -EINVAL; 4159 4160 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4161 adev->gfx.pfp_fw->data; 4162 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4163 adev->gfx.ce_fw->data; 4164 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4165 adev->gfx.me_fw->data; 4166 4167 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4168 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4169 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4170 4171 gfx_v8_0_cp_gfx_enable(adev, false); 4172 4173 /* PFP */ 4174 fw_data = (const __le32 *) 4175 (adev->gfx.pfp_fw->data + 4176 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4177 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4178 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4179 for (i = 0; i < fw_size; i++) 4180 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4181 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4182 4183 /* CE */ 4184 fw_data = (const __le32 *) 4185 (adev->gfx.ce_fw->data + 4186 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4187 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4188 WREG32(mmCP_CE_UCODE_ADDR, 0); 4189 for (i = 0; i < fw_size; i++) 4190 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4191 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4192 4193 /* ME */ 4194 fw_data = (const __le32 *) 4195 (adev->gfx.me_fw->data + 4196 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4197 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4198 WREG32(mmCP_ME_RAM_WADDR, 0); 4199 for (i = 0; i < fw_size; i++) 4200 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4201 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4202 4203 return 0; 4204 } 4205 4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4207 { 4208 u32 count = 0; 4209 const struct cs_section_def *sect = NULL; 4210 const struct cs_extent_def *ext = NULL; 4211 4212 /* begin clear state */ 4213 count += 2; 4214 /* context control state */ 4215 count += 3; 4216 4217 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4218 for (ext = sect->section; ext->extent != NULL; ++ext) { 4219 if (sect->id == SECT_CONTEXT) 4220 count += 2 + ext->reg_count; 4221 else 4222 return 0; 4223 } 4224 } 4225 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4226 count += 4; 4227 /* end clear state */ 4228 count += 2; 4229 /* clear state */ 4230 count += 2; 4231 4232 return count; 4233 } 4234 4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4236 { 4237 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4238 const struct cs_section_def *sect = NULL; 4239 const struct cs_extent_def *ext = NULL; 4240 int r, i; 4241 4242 /* init the CP */ 4243 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4244 WREG32(mmCP_ENDIAN_SWAP, 0); 4245 WREG32(mmCP_DEVICE_ID, 1); 4246 4247 gfx_v8_0_cp_gfx_enable(adev, true); 4248 4249 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4250 if (r) { 4251 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4252 return r; 4253 } 4254 4255 /* clear state buffer */ 4256 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4257 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4258 4259 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4260 amdgpu_ring_write(ring, 0x80000000); 4261 amdgpu_ring_write(ring, 0x80000000); 4262 4263 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4264 for (ext = sect->section; ext->extent != NULL; ++ext) { 4265 if (sect->id == SECT_CONTEXT) { 4266 amdgpu_ring_write(ring, 4267 PACKET3(PACKET3_SET_CONTEXT_REG, 4268 ext->reg_count)); 4269 amdgpu_ring_write(ring, 4270 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4271 for (i = 0; i < ext->reg_count; i++) 4272 amdgpu_ring_write(ring, ext->extent[i]); 4273 } 4274 } 4275 } 4276 4277 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4278 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4279 switch (adev->asic_type) { 4280 case CHIP_TONGA: 4281 case CHIP_POLARIS10: 4282 amdgpu_ring_write(ring, 0x16000012); 4283 amdgpu_ring_write(ring, 0x0000002A); 4284 break; 4285 case CHIP_POLARIS11: 4286 amdgpu_ring_write(ring, 0x16000012); 4287 amdgpu_ring_write(ring, 0x00000000); 4288 break; 4289 case CHIP_FIJI: 4290 amdgpu_ring_write(ring, 0x3a00161a); 4291 amdgpu_ring_write(ring, 0x0000002e); 4292 break; 4293 case CHIP_CARRIZO: 4294 amdgpu_ring_write(ring, 0x00000002); 4295 amdgpu_ring_write(ring, 0x00000000); 4296 break; 4297 case CHIP_TOPAZ: 4298 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4299 0x00000000 : 0x00000002); 4300 amdgpu_ring_write(ring, 0x00000000); 4301 break; 4302 case CHIP_STONEY: 4303 amdgpu_ring_write(ring, 0x00000000); 4304 amdgpu_ring_write(ring, 0x00000000); 4305 break; 4306 default: 4307 BUG(); 4308 } 4309 4310 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4311 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4312 4313 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4314 amdgpu_ring_write(ring, 0); 4315 4316 /* init the CE partitions */ 4317 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4318 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4319 amdgpu_ring_write(ring, 0x8000); 4320 amdgpu_ring_write(ring, 0x8000); 4321 4322 amdgpu_ring_commit(ring); 4323 4324 return 0; 4325 } 4326 4327 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4328 { 4329 struct amdgpu_ring *ring; 4330 u32 tmp; 4331 u32 rb_bufsz; 4332 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4333 int r; 4334 4335 /* Set the write pointer delay */ 4336 WREG32(mmCP_RB_WPTR_DELAY, 0); 4337 4338 /* set the RB to use vmid 0 */ 4339 WREG32(mmCP_RB_VMID, 0); 4340 4341 /* Set ring buffer size */ 4342 ring = &adev->gfx.gfx_ring[0]; 4343 rb_bufsz = order_base_2(ring->ring_size / 8); 4344 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4345 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4346 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4348 #ifdef __BIG_ENDIAN 4349 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4350 #endif 4351 WREG32(mmCP_RB0_CNTL, tmp); 4352 4353 /* Initialize the ring buffer's read and write pointers */ 4354 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4355 ring->wptr = 0; 4356 WREG32(mmCP_RB0_WPTR, ring->wptr); 4357 4358 /* set the wb address wether it's enabled or not */ 4359 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4360 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4361 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4362 4363 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4364 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4365 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4366 mdelay(1); 4367 WREG32(mmCP_RB0_CNTL, tmp); 4368 4369 rb_addr = ring->gpu_addr >> 8; 4370 WREG32(mmCP_RB0_BASE, rb_addr); 4371 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4372 4373 /* no gfx doorbells on iceland */ 4374 if (adev->asic_type != CHIP_TOPAZ) { 4375 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4376 if (ring->use_doorbell) { 4377 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4378 DOORBELL_OFFSET, ring->doorbell_index); 4379 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4380 DOORBELL_HIT, 0); 4381 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4382 DOORBELL_EN, 1); 4383 } else { 4384 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4385 DOORBELL_EN, 0); 4386 } 4387 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4388 4389 if (adev->asic_type == CHIP_TONGA) { 4390 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4391 DOORBELL_RANGE_LOWER, 4392 AMDGPU_DOORBELL_GFX_RING0); 4393 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4394 4395 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4396 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4397 } 4398 4399 } 4400 4401 /* start the ring */ 4402 gfx_v8_0_cp_gfx_start(adev); 4403 ring->ready = true; 4404 r = amdgpu_ring_test_ring(ring); 4405 if (r) 4406 ring->ready = false; 4407 4408 return r; 4409 } 4410 4411 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4412 { 4413 int i; 4414 4415 if (enable) { 4416 WREG32(mmCP_MEC_CNTL, 0); 4417 } else { 4418 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4419 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4420 adev->gfx.compute_ring[i].ready = false; 4421 } 4422 udelay(50); 4423 } 4424 4425 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4426 { 4427 const struct gfx_firmware_header_v1_0 *mec_hdr; 4428 const __le32 *fw_data; 4429 unsigned i, fw_size; 4430 4431 if (!adev->gfx.mec_fw) 4432 return -EINVAL; 4433 4434 gfx_v8_0_cp_compute_enable(adev, false); 4435 4436 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4437 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4438 4439 fw_data = (const __le32 *) 4440 (adev->gfx.mec_fw->data + 4441 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4442 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4443 4444 /* MEC1 */ 4445 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4446 for (i = 0; i < fw_size; i++) 4447 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4448 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4449 4450 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4451 if (adev->gfx.mec2_fw) { 4452 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4453 4454 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4455 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4456 4457 fw_data = (const __le32 *) 4458 (adev->gfx.mec2_fw->data + 4459 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4460 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4461 4462 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4463 for (i = 0; i < fw_size; i++) 4464 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4465 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4466 } 4467 4468 return 0; 4469 } 4470 4471 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4472 { 4473 int i, r; 4474 4475 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4476 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4477 4478 if (ring->mqd_obj) { 4479 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4480 if (unlikely(r != 0)) 4481 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4482 4483 amdgpu_bo_unpin(ring->mqd_obj); 4484 amdgpu_bo_unreserve(ring->mqd_obj); 4485 4486 amdgpu_bo_unref(&ring->mqd_obj); 4487 ring->mqd_obj = NULL; 4488 } 4489 } 4490 } 4491 4492 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4493 { 4494 int r, i, j; 4495 u32 tmp; 4496 bool use_doorbell = true; 4497 u64 hqd_gpu_addr; 4498 u64 mqd_gpu_addr; 4499 u64 eop_gpu_addr; 4500 u64 wb_gpu_addr; 4501 u32 *buf; 4502 struct vi_mqd *mqd; 4503 4504 /* init the queues. */ 4505 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4506 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4507 4508 if (ring->mqd_obj == NULL) { 4509 r = amdgpu_bo_create(adev, 4510 sizeof(struct vi_mqd), 4511 PAGE_SIZE, true, 4512 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 4513 NULL, &ring->mqd_obj); 4514 if (r) { 4515 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 4516 return r; 4517 } 4518 } 4519 4520 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4521 if (unlikely(r != 0)) { 4522 gfx_v8_0_cp_compute_fini(adev); 4523 return r; 4524 } 4525 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 4526 &mqd_gpu_addr); 4527 if (r) { 4528 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 4529 gfx_v8_0_cp_compute_fini(adev); 4530 return r; 4531 } 4532 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 4533 if (r) { 4534 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 4535 gfx_v8_0_cp_compute_fini(adev); 4536 return r; 4537 } 4538 4539 /* init the mqd struct */ 4540 memset(buf, 0, sizeof(struct vi_mqd)); 4541 4542 mqd = (struct vi_mqd *)buf; 4543 mqd->header = 0xC0310800; 4544 mqd->compute_pipelinestat_enable = 0x00000001; 4545 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4546 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4547 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4548 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4549 mqd->compute_misc_reserved = 0x00000003; 4550 4551 mutex_lock(&adev->srbm_mutex); 4552 vi_srbm_select(adev, ring->me, 4553 ring->pipe, 4554 ring->queue, 0); 4555 4556 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 4557 eop_gpu_addr >>= 8; 4558 4559 /* write the EOP addr */ 4560 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 4561 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 4562 4563 /* set the VMID assigned */ 4564 WREG32(mmCP_HQD_VMID, 0); 4565 4566 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4567 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4568 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4569 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4570 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 4571 4572 /* disable wptr polling */ 4573 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4574 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4575 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4576 4577 mqd->cp_hqd_eop_base_addr_lo = 4578 RREG32(mmCP_HQD_EOP_BASE_ADDR); 4579 mqd->cp_hqd_eop_base_addr_hi = 4580 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 4581 4582 /* enable doorbell? */ 4583 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4584 if (use_doorbell) { 4585 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4586 } else { 4587 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 4588 } 4589 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 4590 mqd->cp_hqd_pq_doorbell_control = tmp; 4591 4592 /* disable the queue if it's active */ 4593 mqd->cp_hqd_dequeue_request = 0; 4594 mqd->cp_hqd_pq_rptr = 0; 4595 mqd->cp_hqd_pq_wptr= 0; 4596 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4597 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4598 for (j = 0; j < adev->usec_timeout; j++) { 4599 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4600 break; 4601 udelay(1); 4602 } 4603 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4604 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4605 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4606 } 4607 4608 /* set the pointer to the MQD */ 4609 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 4610 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4611 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4612 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4613 4614 /* set MQD vmid to 0 */ 4615 tmp = RREG32(mmCP_MQD_CONTROL); 4616 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4617 WREG32(mmCP_MQD_CONTROL, tmp); 4618 mqd->cp_mqd_control = tmp; 4619 4620 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4621 hqd_gpu_addr = ring->gpu_addr >> 8; 4622 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4623 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4624 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4625 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4626 4627 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4628 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4630 (order_base_2(ring->ring_size / 4) - 1)); 4631 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4632 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4633 #ifdef __BIG_ENDIAN 4634 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4635 #endif 4636 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4637 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4638 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4639 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4640 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 4641 mqd->cp_hqd_pq_control = tmp; 4642 4643 /* set the wb address wether it's enabled or not */ 4644 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4645 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4646 mqd->cp_hqd_pq_rptr_report_addr_hi = 4647 upper_32_bits(wb_gpu_addr) & 0xffff; 4648 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4649 mqd->cp_hqd_pq_rptr_report_addr_lo); 4650 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4651 mqd->cp_hqd_pq_rptr_report_addr_hi); 4652 4653 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4654 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4655 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4656 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4657 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 4658 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4659 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4660 4661 /* enable the doorbell if requested */ 4662 if (use_doorbell) { 4663 if ((adev->asic_type == CHIP_CARRIZO) || 4664 (adev->asic_type == CHIP_FIJI) || 4665 (adev->asic_type == CHIP_STONEY) || 4666 (adev->asic_type == CHIP_POLARIS11) || 4667 (adev->asic_type == CHIP_POLARIS10)) { 4668 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4669 AMDGPU_DOORBELL_KIQ << 2); 4670 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4671 AMDGPU_DOORBELL_MEC_RING7 << 2); 4672 } 4673 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4674 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4675 DOORBELL_OFFSET, ring->doorbell_index); 4676 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4677 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 4678 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 4679 mqd->cp_hqd_pq_doorbell_control = tmp; 4680 4681 } else { 4682 mqd->cp_hqd_pq_doorbell_control = 0; 4683 } 4684 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 4685 mqd->cp_hqd_pq_doorbell_control); 4686 4687 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4688 ring->wptr = 0; 4689 mqd->cp_hqd_pq_wptr = ring->wptr; 4690 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4691 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4692 4693 /* set the vmid for the queue */ 4694 mqd->cp_hqd_vmid = 0; 4695 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4696 4697 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4698 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4699 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 4700 mqd->cp_hqd_persistent_state = tmp; 4701 if (adev->asic_type == CHIP_STONEY || 4702 adev->asic_type == CHIP_POLARIS11 || 4703 adev->asic_type == CHIP_POLARIS10) { 4704 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 4705 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 4706 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 4707 } 4708 4709 /* activate the queue */ 4710 mqd->cp_hqd_active = 1; 4711 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4712 4713 vi_srbm_select(adev, 0, 0, 0, 0); 4714 mutex_unlock(&adev->srbm_mutex); 4715 4716 amdgpu_bo_kunmap(ring->mqd_obj); 4717 amdgpu_bo_unreserve(ring->mqd_obj); 4718 } 4719 4720 if (use_doorbell) { 4721 tmp = RREG32(mmCP_PQ_STATUS); 4722 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4723 WREG32(mmCP_PQ_STATUS, tmp); 4724 } 4725 4726 gfx_v8_0_cp_compute_enable(adev, true); 4727 4728 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4729 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4730 4731 ring->ready = true; 4732 r = amdgpu_ring_test_ring(ring); 4733 if (r) 4734 ring->ready = false; 4735 } 4736 4737 return 0; 4738 } 4739 4740 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4741 { 4742 int r; 4743 4744 if (!(adev->flags & AMD_IS_APU)) 4745 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4746 4747 if (!adev->pp_enabled) { 4748 if (!adev->firmware.smu_load) { 4749 /* legacy firmware loading */ 4750 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4751 if (r) 4752 return r; 4753 4754 r = gfx_v8_0_cp_compute_load_microcode(adev); 4755 if (r) 4756 return r; 4757 } else { 4758 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4759 AMDGPU_UCODE_ID_CP_CE); 4760 if (r) 4761 return -EINVAL; 4762 4763 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4764 AMDGPU_UCODE_ID_CP_PFP); 4765 if (r) 4766 return -EINVAL; 4767 4768 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4769 AMDGPU_UCODE_ID_CP_ME); 4770 if (r) 4771 return -EINVAL; 4772 4773 if (adev->asic_type == CHIP_TOPAZ) { 4774 r = gfx_v8_0_cp_compute_load_microcode(adev); 4775 if (r) 4776 return r; 4777 } else { 4778 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4779 AMDGPU_UCODE_ID_CP_MEC1); 4780 if (r) 4781 return -EINVAL; 4782 } 4783 } 4784 } 4785 4786 r = gfx_v8_0_cp_gfx_resume(adev); 4787 if (r) 4788 return r; 4789 4790 r = gfx_v8_0_cp_compute_resume(adev); 4791 if (r) 4792 return r; 4793 4794 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4795 4796 return 0; 4797 } 4798 4799 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4800 { 4801 gfx_v8_0_cp_gfx_enable(adev, enable); 4802 gfx_v8_0_cp_compute_enable(adev, enable); 4803 } 4804 4805 static int gfx_v8_0_hw_init(void *handle) 4806 { 4807 int r; 4808 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4809 4810 gfx_v8_0_init_golden_registers(adev); 4811 gfx_v8_0_gpu_init(adev); 4812 4813 r = gfx_v8_0_rlc_resume(adev); 4814 if (r) 4815 return r; 4816 4817 r = gfx_v8_0_cp_resume(adev); 4818 4819 return r; 4820 } 4821 4822 static int gfx_v8_0_hw_fini(void *handle) 4823 { 4824 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4825 4826 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4827 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4828 if (amdgpu_sriov_vf(adev)) { 4829 pr_debug("For SRIOV client, shouldn't do anything.\n"); 4830 return 0; 4831 } 4832 gfx_v8_0_cp_enable(adev, false); 4833 gfx_v8_0_rlc_stop(adev); 4834 gfx_v8_0_cp_compute_fini(adev); 4835 4836 amdgpu_set_powergating_state(adev, 4837 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 4838 4839 return 0; 4840 } 4841 4842 static int gfx_v8_0_suspend(void *handle) 4843 { 4844 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4845 4846 return gfx_v8_0_hw_fini(adev); 4847 } 4848 4849 static int gfx_v8_0_resume(void *handle) 4850 { 4851 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4852 4853 return gfx_v8_0_hw_init(adev); 4854 } 4855 4856 static bool gfx_v8_0_is_idle(void *handle) 4857 { 4858 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4859 4860 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 4861 return false; 4862 else 4863 return true; 4864 } 4865 4866 static int gfx_v8_0_wait_for_idle(void *handle) 4867 { 4868 unsigned i; 4869 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4870 4871 for (i = 0; i < adev->usec_timeout; i++) { 4872 if (gfx_v8_0_is_idle(handle)) 4873 return 0; 4874 4875 udelay(1); 4876 } 4877 return -ETIMEDOUT; 4878 } 4879 4880 static bool gfx_v8_0_check_soft_reset(void *handle) 4881 { 4882 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4883 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4884 u32 tmp; 4885 4886 /* GRBM_STATUS */ 4887 tmp = RREG32(mmGRBM_STATUS); 4888 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4889 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4890 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4891 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4892 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4893 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 4894 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4895 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4896 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4897 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4898 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4899 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4900 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4901 } 4902 4903 /* GRBM_STATUS2 */ 4904 tmp = RREG32(mmGRBM_STATUS2); 4905 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4906 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4907 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4908 4909 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 4910 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 4911 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 4912 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4913 SOFT_RESET_CPF, 1); 4914 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4915 SOFT_RESET_CPC, 1); 4916 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4917 SOFT_RESET_CPG, 1); 4918 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 4919 SOFT_RESET_GRBM, 1); 4920 } 4921 4922 /* SRBM_STATUS */ 4923 tmp = RREG32(mmSRBM_STATUS); 4924 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 4925 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4926 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4927 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 4928 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4929 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 4930 4931 if (grbm_soft_reset || srbm_soft_reset) { 4932 adev->gfx.grbm_soft_reset = grbm_soft_reset; 4933 adev->gfx.srbm_soft_reset = srbm_soft_reset; 4934 return true; 4935 } else { 4936 adev->gfx.grbm_soft_reset = 0; 4937 adev->gfx.srbm_soft_reset = 0; 4938 return false; 4939 } 4940 } 4941 4942 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 4943 struct amdgpu_ring *ring) 4944 { 4945 int i; 4946 4947 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4948 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4949 u32 tmp; 4950 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 4951 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, 4952 DEQUEUE_REQ, 2); 4953 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); 4954 for (i = 0; i < adev->usec_timeout; i++) { 4955 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4956 break; 4957 udelay(1); 4958 } 4959 } 4960 } 4961 4962 static int gfx_v8_0_pre_soft_reset(void *handle) 4963 { 4964 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4965 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4966 4967 if ((!adev->gfx.grbm_soft_reset) && 4968 (!adev->gfx.srbm_soft_reset)) 4969 return 0; 4970 4971 grbm_soft_reset = adev->gfx.grbm_soft_reset; 4972 srbm_soft_reset = adev->gfx.srbm_soft_reset; 4973 4974 /* stop the rlc */ 4975 gfx_v8_0_rlc_stop(adev); 4976 4977 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 4978 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 4979 /* Disable GFX parsing/prefetching */ 4980 gfx_v8_0_cp_gfx_enable(adev, false); 4981 4982 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 4983 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 4984 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 4985 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 4986 int i; 4987 4988 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4989 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4990 4991 gfx_v8_0_inactive_hqd(adev, ring); 4992 } 4993 /* Disable MEC parsing/prefetching */ 4994 gfx_v8_0_cp_compute_enable(adev, false); 4995 } 4996 4997 return 0; 4998 } 4999 5000 static int gfx_v8_0_soft_reset(void *handle) 5001 { 5002 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5003 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5004 u32 tmp; 5005 5006 if ((!adev->gfx.grbm_soft_reset) && 5007 (!adev->gfx.srbm_soft_reset)) 5008 return 0; 5009 5010 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5011 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5012 5013 if (grbm_soft_reset || srbm_soft_reset) { 5014 tmp = RREG32(mmGMCON_DEBUG); 5015 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5016 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5017 WREG32(mmGMCON_DEBUG, tmp); 5018 udelay(50); 5019 } 5020 5021 if (grbm_soft_reset) { 5022 tmp = RREG32(mmGRBM_SOFT_RESET); 5023 tmp |= grbm_soft_reset; 5024 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5025 WREG32(mmGRBM_SOFT_RESET, tmp); 5026 tmp = RREG32(mmGRBM_SOFT_RESET); 5027 5028 udelay(50); 5029 5030 tmp &= ~grbm_soft_reset; 5031 WREG32(mmGRBM_SOFT_RESET, tmp); 5032 tmp = RREG32(mmGRBM_SOFT_RESET); 5033 } 5034 5035 if (srbm_soft_reset) { 5036 tmp = RREG32(mmSRBM_SOFT_RESET); 5037 tmp |= srbm_soft_reset; 5038 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5039 WREG32(mmSRBM_SOFT_RESET, tmp); 5040 tmp = RREG32(mmSRBM_SOFT_RESET); 5041 5042 udelay(50); 5043 5044 tmp &= ~srbm_soft_reset; 5045 WREG32(mmSRBM_SOFT_RESET, tmp); 5046 tmp = RREG32(mmSRBM_SOFT_RESET); 5047 } 5048 5049 if (grbm_soft_reset || srbm_soft_reset) { 5050 tmp = RREG32(mmGMCON_DEBUG); 5051 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5052 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5053 WREG32(mmGMCON_DEBUG, tmp); 5054 } 5055 5056 /* Wait a little for things to settle down */ 5057 udelay(50); 5058 5059 return 0; 5060 } 5061 5062 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5063 struct amdgpu_ring *ring) 5064 { 5065 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5066 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5067 WREG32(mmCP_HQD_PQ_RPTR, 0); 5068 WREG32(mmCP_HQD_PQ_WPTR, 0); 5069 vi_srbm_select(adev, 0, 0, 0, 0); 5070 } 5071 5072 static int gfx_v8_0_post_soft_reset(void *handle) 5073 { 5074 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5075 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5076 5077 if ((!adev->gfx.grbm_soft_reset) && 5078 (!adev->gfx.srbm_soft_reset)) 5079 return 0; 5080 5081 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5082 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5083 5084 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5085 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5086 gfx_v8_0_cp_gfx_resume(adev); 5087 5088 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5089 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5090 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5091 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5092 int i; 5093 5094 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5095 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5096 5097 gfx_v8_0_init_hqd(adev, ring); 5098 } 5099 gfx_v8_0_cp_compute_resume(adev); 5100 } 5101 gfx_v8_0_rlc_start(adev); 5102 5103 return 0; 5104 } 5105 5106 /** 5107 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5108 * 5109 * @adev: amdgpu_device pointer 5110 * 5111 * Fetches a GPU clock counter snapshot. 5112 * Returns the 64 bit clock counter snapshot. 5113 */ 5114 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5115 { 5116 uint64_t clock; 5117 5118 mutex_lock(&adev->gfx.gpu_clock_mutex); 5119 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5120 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5121 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5122 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5123 return clock; 5124 } 5125 5126 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5127 uint32_t vmid, 5128 uint32_t gds_base, uint32_t gds_size, 5129 uint32_t gws_base, uint32_t gws_size, 5130 uint32_t oa_base, uint32_t oa_size) 5131 { 5132 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5133 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5134 5135 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5136 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5137 5138 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5139 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5140 5141 /* GDS Base */ 5142 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5143 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5144 WRITE_DATA_DST_SEL(0))); 5145 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5146 amdgpu_ring_write(ring, 0); 5147 amdgpu_ring_write(ring, gds_base); 5148 5149 /* GDS Size */ 5150 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5151 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5152 WRITE_DATA_DST_SEL(0))); 5153 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5154 amdgpu_ring_write(ring, 0); 5155 amdgpu_ring_write(ring, gds_size); 5156 5157 /* GWS */ 5158 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5159 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5160 WRITE_DATA_DST_SEL(0))); 5161 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5162 amdgpu_ring_write(ring, 0); 5163 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5164 5165 /* OA */ 5166 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5167 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5168 WRITE_DATA_DST_SEL(0))); 5169 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5170 amdgpu_ring_write(ring, 0); 5171 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5172 } 5173 5174 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5175 { 5176 WREG32(mmSQ_IND_INDEX, 5177 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5178 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5179 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5180 (SQ_IND_INDEX__FORCE_READ_MASK)); 5181 return RREG32(mmSQ_IND_DATA); 5182 } 5183 5184 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5185 uint32_t wave, uint32_t thread, 5186 uint32_t regno, uint32_t num, uint32_t *out) 5187 { 5188 WREG32(mmSQ_IND_INDEX, 5189 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5190 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5191 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5192 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5193 (SQ_IND_INDEX__FORCE_READ_MASK) | 5194 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5195 while (num--) 5196 *(out++) = RREG32(mmSQ_IND_DATA); 5197 } 5198 5199 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5200 { 5201 /* type 0 wave data */ 5202 dst[(*no_fields)++] = 0; 5203 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5204 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5205 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5206 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5207 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5208 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5209 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5210 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5211 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5212 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5213 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5214 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5215 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5216 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5217 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5218 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5219 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5220 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5221 } 5222 5223 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5224 uint32_t wave, uint32_t start, 5225 uint32_t size, uint32_t *dst) 5226 { 5227 wave_read_regs( 5228 adev, simd, wave, 0, 5229 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5230 } 5231 5232 5233 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5234 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5235 .select_se_sh = &gfx_v8_0_select_se_sh, 5236 .read_wave_data = &gfx_v8_0_read_wave_data, 5237 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5238 }; 5239 5240 static int gfx_v8_0_early_init(void *handle) 5241 { 5242 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5243 5244 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5245 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5246 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5247 gfx_v8_0_set_ring_funcs(adev); 5248 gfx_v8_0_set_irq_funcs(adev); 5249 gfx_v8_0_set_gds_init(adev); 5250 gfx_v8_0_set_rlc_funcs(adev); 5251 5252 return 0; 5253 } 5254 5255 static int gfx_v8_0_late_init(void *handle) 5256 { 5257 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5258 int r; 5259 5260 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5261 if (r) 5262 return r; 5263 5264 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5265 if (r) 5266 return r; 5267 5268 /* requires IBs so do in late init after IB pool is initialized */ 5269 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5270 if (r) 5271 return r; 5272 5273 amdgpu_set_powergating_state(adev, 5274 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5275 5276 return 0; 5277 } 5278 5279 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5280 bool enable) 5281 { 5282 if (adev->asic_type == CHIP_POLARIS11) 5283 /* Send msg to SMU via Powerplay */ 5284 amdgpu_set_powergating_state(adev, 5285 AMD_IP_BLOCK_TYPE_SMC, 5286 enable ? 5287 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5288 5289 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5290 } 5291 5292 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5293 bool enable) 5294 { 5295 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5296 } 5297 5298 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5299 bool enable) 5300 { 5301 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5302 } 5303 5304 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5305 bool enable) 5306 { 5307 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5308 } 5309 5310 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5311 bool enable) 5312 { 5313 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5314 5315 /* Read any GFX register to wake up GFX. */ 5316 if (!enable) 5317 RREG32(mmDB_RENDER_CONTROL); 5318 } 5319 5320 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5321 bool enable) 5322 { 5323 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5324 cz_enable_gfx_cg_power_gating(adev, true); 5325 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5326 cz_enable_gfx_pipeline_power_gating(adev, true); 5327 } else { 5328 cz_enable_gfx_cg_power_gating(adev, false); 5329 cz_enable_gfx_pipeline_power_gating(adev, false); 5330 } 5331 } 5332 5333 static int gfx_v8_0_set_powergating_state(void *handle, 5334 enum amd_powergating_state state) 5335 { 5336 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5337 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 5338 5339 switch (adev->asic_type) { 5340 case CHIP_CARRIZO: 5341 case CHIP_STONEY: 5342 5343 cz_update_gfx_cg_power_gating(adev, enable); 5344 5345 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5346 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5347 else 5348 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5349 5350 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5351 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5352 else 5353 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5354 break; 5355 case CHIP_POLARIS11: 5356 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5357 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5358 else 5359 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5360 5361 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5362 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5363 else 5364 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5365 5366 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5367 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5368 else 5369 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5370 break; 5371 default: 5372 break; 5373 } 5374 5375 return 0; 5376 } 5377 5378 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5379 uint32_t reg_addr, uint32_t cmd) 5380 { 5381 uint32_t data; 5382 5383 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5384 5385 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5386 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5387 5388 data = RREG32(mmRLC_SERDES_WR_CTRL); 5389 if (adev->asic_type == CHIP_STONEY) 5390 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5391 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5392 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5393 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5394 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5395 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5396 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5397 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5398 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5399 else 5400 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5401 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5402 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5403 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5404 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5405 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5406 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5407 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5408 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5409 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5410 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5411 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5412 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5413 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5414 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5415 5416 WREG32(mmRLC_SERDES_WR_CTRL, data); 5417 } 5418 5419 #define MSG_ENTER_RLC_SAFE_MODE 1 5420 #define MSG_EXIT_RLC_SAFE_MODE 0 5421 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5422 #define RLC_GPR_REG2__REQ__SHIFT 0 5423 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5424 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5425 5426 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) 5427 { 5428 u32 data = 0; 5429 unsigned i; 5430 5431 data = RREG32(mmRLC_CNTL); 5432 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5433 return; 5434 5435 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5436 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5437 AMD_PG_SUPPORT_GFX_DMG))) { 5438 data |= RLC_GPR_REG2__REQ_MASK; 5439 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5440 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5441 WREG32(mmRLC_GPR_REG2, data); 5442 5443 for (i = 0; i < adev->usec_timeout; i++) { 5444 if ((RREG32(mmRLC_GPM_STAT) & 5445 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5446 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5447 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5448 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5449 break; 5450 udelay(1); 5451 } 5452 5453 for (i = 0; i < adev->usec_timeout; i++) { 5454 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5455 break; 5456 udelay(1); 5457 } 5458 adev->gfx.rlc.in_safe_mode = true; 5459 } 5460 } 5461 5462 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) 5463 { 5464 u32 data; 5465 unsigned i; 5466 5467 data = RREG32(mmRLC_CNTL); 5468 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5469 return; 5470 5471 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5472 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5473 AMD_PG_SUPPORT_GFX_DMG))) { 5474 data |= RLC_GPR_REG2__REQ_MASK; 5475 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5476 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5477 WREG32(mmRLC_GPR_REG2, data); 5478 adev->gfx.rlc.in_safe_mode = false; 5479 } 5480 5481 for (i = 0; i < adev->usec_timeout; i++) { 5482 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5483 break; 5484 udelay(1); 5485 } 5486 } 5487 5488 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5489 { 5490 u32 data; 5491 unsigned i; 5492 5493 data = RREG32(mmRLC_CNTL); 5494 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5495 return; 5496 5497 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5498 data |= RLC_SAFE_MODE__CMD_MASK; 5499 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5500 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5501 WREG32(mmRLC_SAFE_MODE, data); 5502 5503 for (i = 0; i < adev->usec_timeout; i++) { 5504 if ((RREG32(mmRLC_GPM_STAT) & 5505 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5506 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5507 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5508 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5509 break; 5510 udelay(1); 5511 } 5512 5513 for (i = 0; i < adev->usec_timeout; i++) { 5514 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5515 break; 5516 udelay(1); 5517 } 5518 adev->gfx.rlc.in_safe_mode = true; 5519 } 5520 } 5521 5522 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5523 { 5524 u32 data = 0; 5525 unsigned i; 5526 5527 data = RREG32(mmRLC_CNTL); 5528 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5529 return; 5530 5531 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5532 if (adev->gfx.rlc.in_safe_mode) { 5533 data |= RLC_SAFE_MODE__CMD_MASK; 5534 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5535 WREG32(mmRLC_SAFE_MODE, data); 5536 adev->gfx.rlc.in_safe_mode = false; 5537 } 5538 } 5539 5540 for (i = 0; i < adev->usec_timeout; i++) { 5541 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5542 break; 5543 udelay(1); 5544 } 5545 } 5546 5547 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) 5548 { 5549 adev->gfx.rlc.in_safe_mode = true; 5550 } 5551 5552 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) 5553 { 5554 adev->gfx.rlc.in_safe_mode = false; 5555 } 5556 5557 static const struct amdgpu_rlc_funcs cz_rlc_funcs = { 5558 .enter_safe_mode = cz_enter_rlc_safe_mode, 5559 .exit_safe_mode = cz_exit_rlc_safe_mode 5560 }; 5561 5562 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5563 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5564 .exit_safe_mode = iceland_exit_rlc_safe_mode 5565 }; 5566 5567 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { 5568 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, 5569 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode 5570 }; 5571 5572 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5573 bool enable) 5574 { 5575 uint32_t temp, data; 5576 5577 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5578 5579 /* It is disabled by HW by default */ 5580 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5581 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5582 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5583 /* 1 - RLC memory Light sleep */ 5584 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5585 5586 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5587 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5588 } 5589 5590 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5591 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5592 if (adev->flags & AMD_IS_APU) 5593 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5594 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5595 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5596 else 5597 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5598 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5599 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5600 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5601 5602 if (temp != data) 5603 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5604 5605 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5606 gfx_v8_0_wait_for_rlc_serdes(adev); 5607 5608 /* 5 - clear mgcg override */ 5609 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5610 5611 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5612 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5613 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5614 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5615 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5616 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5617 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5618 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5619 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5620 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5621 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5622 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5623 if (temp != data) 5624 WREG32(mmCGTS_SM_CTRL_REG, data); 5625 } 5626 udelay(50); 5627 5628 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5629 gfx_v8_0_wait_for_rlc_serdes(adev); 5630 } else { 5631 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5632 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5633 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5634 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5635 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5636 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5637 if (temp != data) 5638 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5639 5640 /* 2 - disable MGLS in RLC */ 5641 data = RREG32(mmRLC_MEM_SLP_CNTL); 5642 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5643 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5644 WREG32(mmRLC_MEM_SLP_CNTL, data); 5645 } 5646 5647 /* 3 - disable MGLS in CP */ 5648 data = RREG32(mmCP_MEM_SLP_CNTL); 5649 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5650 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5651 WREG32(mmCP_MEM_SLP_CNTL, data); 5652 } 5653 5654 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5655 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5656 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5657 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5658 if (temp != data) 5659 WREG32(mmCGTS_SM_CTRL_REG, data); 5660 5661 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5662 gfx_v8_0_wait_for_rlc_serdes(adev); 5663 5664 /* 6 - set mgcg override */ 5665 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5666 5667 udelay(50); 5668 5669 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5670 gfx_v8_0_wait_for_rlc_serdes(adev); 5671 } 5672 5673 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5674 } 5675 5676 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5677 bool enable) 5678 { 5679 uint32_t temp, temp1, data, data1; 5680 5681 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5682 5683 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5684 5685 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5686 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5687 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5688 if (temp1 != data1) 5689 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5690 5691 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5692 gfx_v8_0_wait_for_rlc_serdes(adev); 5693 5694 /* 2 - clear cgcg override */ 5695 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5696 5697 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5698 gfx_v8_0_wait_for_rlc_serdes(adev); 5699 5700 /* 3 - write cmd to set CGLS */ 5701 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5702 5703 /* 4 - enable cgcg */ 5704 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5705 5706 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5707 /* enable cgls*/ 5708 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5709 5710 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5711 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5712 5713 if (temp1 != data1) 5714 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5715 } else { 5716 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5717 } 5718 5719 if (temp != data) 5720 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5721 5722 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5723 * Cmp_busy/GFX_Idle interrupts 5724 */ 5725 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5726 } else { 5727 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5728 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5729 5730 /* TEST CGCG */ 5731 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5732 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5733 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5734 if (temp1 != data1) 5735 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5736 5737 /* read gfx register to wake up cgcg */ 5738 RREG32(mmCB_CGTT_SCLK_CTRL); 5739 RREG32(mmCB_CGTT_SCLK_CTRL); 5740 RREG32(mmCB_CGTT_SCLK_CTRL); 5741 RREG32(mmCB_CGTT_SCLK_CTRL); 5742 5743 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5744 gfx_v8_0_wait_for_rlc_serdes(adev); 5745 5746 /* write cmd to Set CGCG Overrride */ 5747 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5748 5749 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5750 gfx_v8_0_wait_for_rlc_serdes(adev); 5751 5752 /* write cmd to Clear CGLS */ 5753 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5754 5755 /* disable cgcg, cgls should be disabled too. */ 5756 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5757 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5758 if (temp != data) 5759 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5760 } 5761 5762 gfx_v8_0_wait_for_rlc_serdes(adev); 5763 5764 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5765 } 5766 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5767 bool enable) 5768 { 5769 if (enable) { 5770 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5771 * === MGCG + MGLS + TS(CG/LS) === 5772 */ 5773 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5774 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5775 } else { 5776 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5777 * === CGCG + CGLS === 5778 */ 5779 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5780 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5781 } 5782 return 0; 5783 } 5784 5785 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5786 enum amd_clockgating_state state) 5787 { 5788 uint32_t msg_id, pp_state = 0; 5789 uint32_t pp_support_state = 0; 5790 void *pp_handle = adev->powerplay.pp_handle; 5791 5792 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5793 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5794 pp_support_state = PP_STATE_SUPPORT_LS; 5795 pp_state = PP_STATE_LS; 5796 } 5797 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5798 pp_support_state |= PP_STATE_SUPPORT_CG; 5799 pp_state |= PP_STATE_CG; 5800 } 5801 if (state == AMD_CG_STATE_UNGATE) 5802 pp_state = 0; 5803 5804 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5805 PP_BLOCK_GFX_CG, 5806 pp_support_state, 5807 pp_state); 5808 amd_set_clockgating_by_smu(pp_handle, msg_id); 5809 } 5810 5811 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5812 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5813 pp_support_state = PP_STATE_SUPPORT_LS; 5814 pp_state = PP_STATE_LS; 5815 } 5816 5817 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5818 pp_support_state |= PP_STATE_SUPPORT_CG; 5819 pp_state |= PP_STATE_CG; 5820 } 5821 5822 if (state == AMD_CG_STATE_UNGATE) 5823 pp_state = 0; 5824 5825 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5826 PP_BLOCK_GFX_MG, 5827 pp_support_state, 5828 pp_state); 5829 amd_set_clockgating_by_smu(pp_handle, msg_id); 5830 } 5831 5832 return 0; 5833 } 5834 5835 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 5836 enum amd_clockgating_state state) 5837 { 5838 5839 uint32_t msg_id, pp_state = 0; 5840 uint32_t pp_support_state = 0; 5841 void *pp_handle = adev->powerplay.pp_handle; 5842 5843 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5844 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5845 pp_support_state = PP_STATE_SUPPORT_LS; 5846 pp_state = PP_STATE_LS; 5847 } 5848 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5849 pp_support_state |= PP_STATE_SUPPORT_CG; 5850 pp_state |= PP_STATE_CG; 5851 } 5852 if (state == AMD_CG_STATE_UNGATE) 5853 pp_state = 0; 5854 5855 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5856 PP_BLOCK_GFX_CG, 5857 pp_support_state, 5858 pp_state); 5859 amd_set_clockgating_by_smu(pp_handle, msg_id); 5860 } 5861 5862 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 5863 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5864 pp_support_state = PP_STATE_SUPPORT_LS; 5865 pp_state = PP_STATE_LS; 5866 } 5867 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5868 pp_support_state |= PP_STATE_SUPPORT_CG; 5869 pp_state |= PP_STATE_CG; 5870 } 5871 if (state == AMD_CG_STATE_UNGATE) 5872 pp_state = 0; 5873 5874 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5875 PP_BLOCK_GFX_3D, 5876 pp_support_state, 5877 pp_state); 5878 amd_set_clockgating_by_smu(pp_handle, msg_id); 5879 } 5880 5881 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5882 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5883 pp_support_state = PP_STATE_SUPPORT_LS; 5884 pp_state = PP_STATE_LS; 5885 } 5886 5887 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5888 pp_support_state |= PP_STATE_SUPPORT_CG; 5889 pp_state |= PP_STATE_CG; 5890 } 5891 5892 if (state == AMD_CG_STATE_UNGATE) 5893 pp_state = 0; 5894 5895 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5896 PP_BLOCK_GFX_MG, 5897 pp_support_state, 5898 pp_state); 5899 amd_set_clockgating_by_smu(pp_handle, msg_id); 5900 } 5901 5902 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5903 pp_support_state = PP_STATE_SUPPORT_LS; 5904 5905 if (state == AMD_CG_STATE_UNGATE) 5906 pp_state = 0; 5907 else 5908 pp_state = PP_STATE_LS; 5909 5910 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5911 PP_BLOCK_GFX_RLC, 5912 pp_support_state, 5913 pp_state); 5914 amd_set_clockgating_by_smu(pp_handle, msg_id); 5915 } 5916 5917 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 5918 pp_support_state = PP_STATE_SUPPORT_LS; 5919 5920 if (state == AMD_CG_STATE_UNGATE) 5921 pp_state = 0; 5922 else 5923 pp_state = PP_STATE_LS; 5924 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5925 PP_BLOCK_GFX_CP, 5926 pp_support_state, 5927 pp_state); 5928 amd_set_clockgating_by_smu(pp_handle, msg_id); 5929 } 5930 5931 return 0; 5932 } 5933 5934 static int gfx_v8_0_set_clockgating_state(void *handle, 5935 enum amd_clockgating_state state) 5936 { 5937 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5938 5939 switch (adev->asic_type) { 5940 case CHIP_FIJI: 5941 case CHIP_CARRIZO: 5942 case CHIP_STONEY: 5943 gfx_v8_0_update_gfx_clock_gating(adev, 5944 state == AMD_CG_STATE_GATE ? true : false); 5945 break; 5946 case CHIP_TONGA: 5947 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 5948 break; 5949 case CHIP_POLARIS10: 5950 case CHIP_POLARIS11: 5951 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 5952 break; 5953 default: 5954 break; 5955 } 5956 return 0; 5957 } 5958 5959 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 5960 { 5961 return ring->adev->wb.wb[ring->rptr_offs]; 5962 } 5963 5964 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5965 { 5966 struct amdgpu_device *adev = ring->adev; 5967 5968 if (ring->use_doorbell) 5969 /* XXX check if swapping is necessary on BE */ 5970 return ring->adev->wb.wb[ring->wptr_offs]; 5971 else 5972 return RREG32(mmCP_RB0_WPTR); 5973 } 5974 5975 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5976 { 5977 struct amdgpu_device *adev = ring->adev; 5978 5979 if (ring->use_doorbell) { 5980 /* XXX check if swapping is necessary on BE */ 5981 adev->wb.wb[ring->wptr_offs] = ring->wptr; 5982 WDOORBELL32(ring->doorbell_index, ring->wptr); 5983 } else { 5984 WREG32(mmCP_RB0_WPTR, ring->wptr); 5985 (void)RREG32(mmCP_RB0_WPTR); 5986 } 5987 } 5988 5989 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5990 { 5991 u32 ref_and_mask, reg_mem_engine; 5992 5993 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5994 switch (ring->me) { 5995 case 1: 5996 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 5997 break; 5998 case 2: 5999 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6000 break; 6001 default: 6002 return; 6003 } 6004 reg_mem_engine = 0; 6005 } else { 6006 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6007 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6008 } 6009 6010 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6011 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6012 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6013 reg_mem_engine)); 6014 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6015 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6016 amdgpu_ring_write(ring, ref_and_mask); 6017 amdgpu_ring_write(ring, ref_and_mask); 6018 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6019 } 6020 6021 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6022 { 6023 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6024 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6025 EVENT_INDEX(4)); 6026 6027 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6028 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6029 EVENT_INDEX(0)); 6030 } 6031 6032 6033 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6034 { 6035 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6036 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6037 WRITE_DATA_DST_SEL(0) | 6038 WR_CONFIRM)); 6039 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6040 amdgpu_ring_write(ring, 0); 6041 amdgpu_ring_write(ring, 1); 6042 6043 } 6044 6045 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6046 struct amdgpu_ib *ib, 6047 unsigned vm_id, bool ctx_switch) 6048 { 6049 u32 header, control = 0; 6050 6051 if (ib->flags & AMDGPU_IB_FLAG_CE) 6052 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6053 else 6054 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6055 6056 control |= ib->length_dw | (vm_id << 24); 6057 6058 amdgpu_ring_write(ring, header); 6059 amdgpu_ring_write(ring, 6060 #ifdef __BIG_ENDIAN 6061 (2 << 0) | 6062 #endif 6063 (ib->gpu_addr & 0xFFFFFFFC)); 6064 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6065 amdgpu_ring_write(ring, control); 6066 } 6067 6068 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6069 struct amdgpu_ib *ib, 6070 unsigned vm_id, bool ctx_switch) 6071 { 6072 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6073 6074 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6075 amdgpu_ring_write(ring, 6076 #ifdef __BIG_ENDIAN 6077 (2 << 0) | 6078 #endif 6079 (ib->gpu_addr & 0xFFFFFFFC)); 6080 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6081 amdgpu_ring_write(ring, control); 6082 } 6083 6084 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6085 u64 seq, unsigned flags) 6086 { 6087 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6088 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6089 6090 /* EVENT_WRITE_EOP - flush caches, send int */ 6091 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6092 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6093 EOP_TC_ACTION_EN | 6094 EOP_TC_WB_ACTION_EN | 6095 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6096 EVENT_INDEX(5))); 6097 amdgpu_ring_write(ring, addr & 0xfffffffc); 6098 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6099 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6100 amdgpu_ring_write(ring, lower_32_bits(seq)); 6101 amdgpu_ring_write(ring, upper_32_bits(seq)); 6102 6103 } 6104 6105 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6106 { 6107 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6108 uint32_t seq = ring->fence_drv.sync_seq; 6109 uint64_t addr = ring->fence_drv.gpu_addr; 6110 6111 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6112 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6113 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6114 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6115 amdgpu_ring_write(ring, addr & 0xfffffffc); 6116 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6117 amdgpu_ring_write(ring, seq); 6118 amdgpu_ring_write(ring, 0xffffffff); 6119 amdgpu_ring_write(ring, 4); /* poll interval */ 6120 } 6121 6122 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6123 unsigned vm_id, uint64_t pd_addr) 6124 { 6125 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6126 6127 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6128 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6129 WRITE_DATA_DST_SEL(0)) | 6130 WR_CONFIRM); 6131 if (vm_id < 8) { 6132 amdgpu_ring_write(ring, 6133 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6134 } else { 6135 amdgpu_ring_write(ring, 6136 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6137 } 6138 amdgpu_ring_write(ring, 0); 6139 amdgpu_ring_write(ring, pd_addr >> 12); 6140 6141 /* bits 0-15 are the VM contexts0-15 */ 6142 /* invalidate the cache */ 6143 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6144 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6145 WRITE_DATA_DST_SEL(0))); 6146 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6147 amdgpu_ring_write(ring, 0); 6148 amdgpu_ring_write(ring, 1 << vm_id); 6149 6150 /* wait for the invalidate to complete */ 6151 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6152 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6153 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6154 WAIT_REG_MEM_ENGINE(0))); /* me */ 6155 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6156 amdgpu_ring_write(ring, 0); 6157 amdgpu_ring_write(ring, 0); /* ref */ 6158 amdgpu_ring_write(ring, 0); /* mask */ 6159 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6160 6161 /* compute doesn't have PFP */ 6162 if (usepfp) { 6163 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6164 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6165 amdgpu_ring_write(ring, 0x0); 6166 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ 6167 amdgpu_ring_insert_nop(ring, 128); 6168 } 6169 } 6170 6171 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6172 { 6173 return ring->adev->wb.wb[ring->wptr_offs]; 6174 } 6175 6176 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6177 { 6178 struct amdgpu_device *adev = ring->adev; 6179 6180 /* XXX check if swapping is necessary on BE */ 6181 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6182 WDOORBELL32(ring->doorbell_index, ring->wptr); 6183 } 6184 6185 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6186 u64 addr, u64 seq, 6187 unsigned flags) 6188 { 6189 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6190 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6191 6192 /* RELEASE_MEM - flush caches, send int */ 6193 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6194 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6195 EOP_TC_ACTION_EN | 6196 EOP_TC_WB_ACTION_EN | 6197 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6198 EVENT_INDEX(5))); 6199 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6200 amdgpu_ring_write(ring, addr & 0xfffffffc); 6201 amdgpu_ring_write(ring, upper_32_bits(addr)); 6202 amdgpu_ring_write(ring, lower_32_bits(seq)); 6203 amdgpu_ring_write(ring, upper_32_bits(seq)); 6204 } 6205 6206 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6207 { 6208 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6209 amdgpu_ring_write(ring, 0); 6210 } 6211 6212 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6213 { 6214 uint32_t dw2 = 0; 6215 6216 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6217 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6218 gfx_v8_0_ring_emit_vgt_flush(ring); 6219 /* set load_global_config & load_global_uconfig */ 6220 dw2 |= 0x8001; 6221 /* set load_cs_sh_regs */ 6222 dw2 |= 0x01000000; 6223 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6224 dw2 |= 0x10002; 6225 6226 /* set load_ce_ram if preamble presented */ 6227 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6228 dw2 |= 0x10000000; 6229 } else { 6230 /* still load_ce_ram if this is the first time preamble presented 6231 * although there is no context switch happens. 6232 */ 6233 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6234 dw2 |= 0x10000000; 6235 } 6236 6237 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6238 amdgpu_ring_write(ring, dw2); 6239 amdgpu_ring_write(ring, 0); 6240 } 6241 6242 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6243 enum amdgpu_interrupt_state state) 6244 { 6245 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6246 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6247 } 6248 6249 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6250 int me, int pipe, 6251 enum amdgpu_interrupt_state state) 6252 { 6253 /* 6254 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6255 * handles the setting of interrupts for this specific pipe. All other 6256 * pipes' interrupts are set by amdkfd. 6257 */ 6258 6259 if (me == 1) { 6260 switch (pipe) { 6261 case 0: 6262 break; 6263 default: 6264 DRM_DEBUG("invalid pipe %d\n", pipe); 6265 return; 6266 } 6267 } else { 6268 DRM_DEBUG("invalid me %d\n", me); 6269 return; 6270 } 6271 6272 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6273 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6274 } 6275 6276 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6277 struct amdgpu_irq_src *source, 6278 unsigned type, 6279 enum amdgpu_interrupt_state state) 6280 { 6281 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6282 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6283 6284 return 0; 6285 } 6286 6287 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6288 struct amdgpu_irq_src *source, 6289 unsigned type, 6290 enum amdgpu_interrupt_state state) 6291 { 6292 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6293 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6294 6295 return 0; 6296 } 6297 6298 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6299 struct amdgpu_irq_src *src, 6300 unsigned type, 6301 enum amdgpu_interrupt_state state) 6302 { 6303 switch (type) { 6304 case AMDGPU_CP_IRQ_GFX_EOP: 6305 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6306 break; 6307 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6308 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6309 break; 6310 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6311 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6312 break; 6313 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6314 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6315 break; 6316 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6317 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6318 break; 6319 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6320 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6321 break; 6322 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6323 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6324 break; 6325 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6326 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6327 break; 6328 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6329 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6330 break; 6331 default: 6332 break; 6333 } 6334 return 0; 6335 } 6336 6337 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6338 struct amdgpu_irq_src *source, 6339 struct amdgpu_iv_entry *entry) 6340 { 6341 int i; 6342 u8 me_id, pipe_id, queue_id; 6343 struct amdgpu_ring *ring; 6344 6345 DRM_DEBUG("IH: CP EOP\n"); 6346 me_id = (entry->ring_id & 0x0c) >> 2; 6347 pipe_id = (entry->ring_id & 0x03) >> 0; 6348 queue_id = (entry->ring_id & 0x70) >> 4; 6349 6350 switch (me_id) { 6351 case 0: 6352 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6353 break; 6354 case 1: 6355 case 2: 6356 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6357 ring = &adev->gfx.compute_ring[i]; 6358 /* Per-queue interrupt is supported for MEC starting from VI. 6359 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6360 */ 6361 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6362 amdgpu_fence_process(ring); 6363 } 6364 break; 6365 } 6366 return 0; 6367 } 6368 6369 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6370 struct amdgpu_irq_src *source, 6371 struct amdgpu_iv_entry *entry) 6372 { 6373 DRM_ERROR("Illegal register access in command stream\n"); 6374 schedule_work(&adev->reset_work); 6375 return 0; 6376 } 6377 6378 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6379 struct amdgpu_irq_src *source, 6380 struct amdgpu_iv_entry *entry) 6381 { 6382 DRM_ERROR("Illegal instruction in command stream\n"); 6383 schedule_work(&adev->reset_work); 6384 return 0; 6385 } 6386 6387 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6388 .name = "gfx_v8_0", 6389 .early_init = gfx_v8_0_early_init, 6390 .late_init = gfx_v8_0_late_init, 6391 .sw_init = gfx_v8_0_sw_init, 6392 .sw_fini = gfx_v8_0_sw_fini, 6393 .hw_init = gfx_v8_0_hw_init, 6394 .hw_fini = gfx_v8_0_hw_fini, 6395 .suspend = gfx_v8_0_suspend, 6396 .resume = gfx_v8_0_resume, 6397 .is_idle = gfx_v8_0_is_idle, 6398 .wait_for_idle = gfx_v8_0_wait_for_idle, 6399 .check_soft_reset = gfx_v8_0_check_soft_reset, 6400 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6401 .soft_reset = gfx_v8_0_soft_reset, 6402 .post_soft_reset = gfx_v8_0_post_soft_reset, 6403 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6404 .set_powergating_state = gfx_v8_0_set_powergating_state, 6405 }; 6406 6407 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6408 .type = AMDGPU_RING_TYPE_GFX, 6409 .align_mask = 0xff, 6410 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6411 .get_rptr = gfx_v8_0_ring_get_rptr, 6412 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6413 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6414 .emit_frame_size = 6415 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6416 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6417 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6418 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 6419 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6420 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ 6421 2 + /* gfx_v8_ring_emit_sb */ 6422 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */ 6423 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6424 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6425 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6426 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6427 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6428 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6429 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6430 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6431 .test_ring = gfx_v8_0_ring_test_ring, 6432 .test_ib = gfx_v8_0_ring_test_ib, 6433 .insert_nop = amdgpu_ring_insert_nop, 6434 .pad_ib = amdgpu_ring_generic_pad_ib, 6435 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6436 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6437 }; 6438 6439 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6440 .type = AMDGPU_RING_TYPE_COMPUTE, 6441 .align_mask = 0xff, 6442 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6443 .get_rptr = gfx_v8_0_ring_get_rptr, 6444 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6445 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6446 .emit_frame_size = 6447 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6448 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6449 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6450 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6451 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6452 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6453 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6454 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6455 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6456 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6457 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6458 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6459 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6460 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6461 .test_ring = gfx_v8_0_ring_test_ring, 6462 .test_ib = gfx_v8_0_ring_test_ib, 6463 .insert_nop = amdgpu_ring_insert_nop, 6464 .pad_ib = amdgpu_ring_generic_pad_ib, 6465 }; 6466 6467 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6468 { 6469 int i; 6470 6471 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6472 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6473 6474 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6475 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6476 } 6477 6478 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6479 .set = gfx_v8_0_set_eop_interrupt_state, 6480 .process = gfx_v8_0_eop_irq, 6481 }; 6482 6483 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6484 .set = gfx_v8_0_set_priv_reg_fault_state, 6485 .process = gfx_v8_0_priv_reg_irq, 6486 }; 6487 6488 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6489 .set = gfx_v8_0_set_priv_inst_fault_state, 6490 .process = gfx_v8_0_priv_inst_irq, 6491 }; 6492 6493 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6494 { 6495 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6496 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6497 6498 adev->gfx.priv_reg_irq.num_types = 1; 6499 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6500 6501 adev->gfx.priv_inst_irq.num_types = 1; 6502 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6503 } 6504 6505 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6506 { 6507 switch (adev->asic_type) { 6508 case CHIP_TOPAZ: 6509 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6510 break; 6511 case CHIP_STONEY: 6512 case CHIP_CARRIZO: 6513 adev->gfx.rlc.funcs = &cz_rlc_funcs; 6514 break; 6515 default: 6516 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; 6517 break; 6518 } 6519 } 6520 6521 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6522 { 6523 /* init asci gds info */ 6524 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6525 adev->gds.gws.total_size = 64; 6526 adev->gds.oa.total_size = 16; 6527 6528 if (adev->gds.mem.total_size == 64 * 1024) { 6529 adev->gds.mem.gfx_partition_size = 4096; 6530 adev->gds.mem.cs_partition_size = 4096; 6531 6532 adev->gds.gws.gfx_partition_size = 4; 6533 adev->gds.gws.cs_partition_size = 4; 6534 6535 adev->gds.oa.gfx_partition_size = 4; 6536 adev->gds.oa.cs_partition_size = 1; 6537 } else { 6538 adev->gds.mem.gfx_partition_size = 1024; 6539 adev->gds.mem.cs_partition_size = 1024; 6540 6541 adev->gds.gws.gfx_partition_size = 16; 6542 adev->gds.gws.cs_partition_size = 16; 6543 6544 adev->gds.oa.gfx_partition_size = 4; 6545 adev->gds.oa.cs_partition_size = 4; 6546 } 6547 } 6548 6549 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6550 u32 bitmap) 6551 { 6552 u32 data; 6553 6554 if (!bitmap) 6555 return; 6556 6557 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6558 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6559 6560 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 6561 } 6562 6563 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6564 { 6565 u32 data, mask; 6566 6567 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6568 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6569 6570 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6571 6572 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6573 } 6574 6575 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6576 { 6577 int i, j, k, counter, active_cu_number = 0; 6578 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6579 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6580 unsigned disable_masks[4 * 2]; 6581 6582 memset(cu_info, 0, sizeof(*cu_info)); 6583 6584 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 6585 6586 mutex_lock(&adev->grbm_idx_mutex); 6587 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6588 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6589 mask = 1; 6590 ao_bitmap = 0; 6591 counter = 0; 6592 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 6593 if (i < 4 && j < 2) 6594 gfx_v8_0_set_user_cu_inactive_bitmap( 6595 adev, disable_masks[i * 2 + j]); 6596 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6597 cu_info->bitmap[i][j] = bitmap; 6598 6599 for (k = 0; k < 16; k ++) { 6600 if (bitmap & mask) { 6601 if (counter < 2) 6602 ao_bitmap |= mask; 6603 counter ++; 6604 } 6605 mask <<= 1; 6606 } 6607 active_cu_number += counter; 6608 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6609 } 6610 } 6611 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6612 mutex_unlock(&adev->grbm_idx_mutex); 6613 6614 cu_info->number = active_cu_number; 6615 cu_info->ao_cu_mask = ao_cu_mask; 6616 } 6617 6618 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 6619 { 6620 .type = AMD_IP_BLOCK_TYPE_GFX, 6621 .major = 8, 6622 .minor = 0, 6623 .rev = 0, 6624 .funcs = &gfx_v8_0_ip_funcs, 6625 }; 6626 6627 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 6628 { 6629 .type = AMD_IP_BLOCK_TYPE_GFX, 6630 .major = 8, 6631 .minor = 1, 6632 .rev = 0, 6633 .funcs = &gfx_v8_0_ip_funcs, 6634 }; 6635