1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vid.h" 29 #include "amdgpu_ucode.h" 30 #include "amdgpu_atombios.h" 31 #include "clearstate_vi.h" 32 33 #include "gmc/gmc_8_2_d.h" 34 #include "gmc/gmc_8_2_sh_mask.h" 35 36 #include "oss/oss_3_0_d.h" 37 #include "oss/oss_3_0_sh_mask.h" 38 39 #include "bif/bif_5_0_d.h" 40 #include "bif/bif_5_0_sh_mask.h" 41 42 #include "gca/gfx_8_0_d.h" 43 #include "gca/gfx_8_0_enum.h" 44 #include "gca/gfx_8_0_sh_mask.h" 45 #include "gca/gfx_8_0_enum.h" 46 47 #include "dce/dce_10_0_d.h" 48 #include "dce/dce_10_0_sh_mask.h" 49 50 #define GFX8_NUM_GFX_RINGS 1 51 #define GFX8_NUM_COMPUTE_RINGS 8 52 53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 57 58 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 59 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 60 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 61 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 62 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 63 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 64 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 65 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 66 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 67 68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 74 75 /* BPM SERDES CMD */ 76 #define SET_BPM_SERDES_CMD 1 77 #define CLE_BPM_SERDES_CMD 0 78 79 /* BPM Register Address*/ 80 enum { 81 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 82 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 83 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 84 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 85 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 86 BPM_REG_FGCG_MAX 87 }; 88 89 #define RLC_FormatDirectRegListLength 14 90 91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 97 98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 100 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 103 104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 110 111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 113 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 123 124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 130 131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 137 138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 139 { 140 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 141 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 142 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 143 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 144 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 145 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 146 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 147 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 148 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 149 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 150 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 151 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 152 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 153 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 154 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 155 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 156 }; 157 158 static const u32 golden_settings_tonga_a11[] = 159 { 160 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 161 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 162 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 163 mmGB_GPU_ID, 0x0000000f, 0x00000000, 164 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 165 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 166 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 167 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 168 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 169 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 170 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 171 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 172 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 173 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 174 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 175 }; 176 177 static const u32 tonga_golden_common_all[] = 178 { 179 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 180 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 181 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 182 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 183 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 184 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 185 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 186 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 187 }; 188 189 static const u32 tonga_mgcg_cgcg_init[] = 190 { 191 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 192 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 193 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 194 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 195 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 196 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 197 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 198 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 200 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 201 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 202 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 203 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 204 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 205 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 209 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 210 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 211 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 212 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 213 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 214 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 215 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 216 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 217 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 218 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 219 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 220 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 221 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 222 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 223 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 224 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 225 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 226 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 227 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 228 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 229 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 230 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 231 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 232 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 233 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 234 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 235 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 236 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 237 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 238 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 239 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 240 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 241 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 242 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 243 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 244 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 245 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 246 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 247 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 248 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 249 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 250 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 251 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 252 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 253 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 254 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 255 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 256 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 257 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 258 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 259 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 260 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 261 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 262 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 263 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 264 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 265 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 266 }; 267 268 static const u32 golden_settings_polaris11_a11[] = 269 { 270 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, 271 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 272 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 273 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 274 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 275 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 276 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 277 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 278 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 279 mmSQ_CONFIG, 0x07f80000, 0x07180000, 280 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 281 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 282 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 283 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 284 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 285 }; 286 287 static const u32 polaris11_golden_common_all[] = 288 { 289 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 290 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 291 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 292 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 293 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 294 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 295 }; 296 297 static const u32 golden_settings_polaris10_a11[] = 298 { 299 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 300 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, 301 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 302 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 303 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 304 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 305 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 306 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 307 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 308 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 309 mmSQ_CONFIG, 0x07f80000, 0x07180000, 310 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 311 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 312 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 313 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 314 }; 315 316 static const u32 polaris10_golden_common_all[] = 317 { 318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 319 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 320 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 321 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 322 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 323 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 324 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 325 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 326 }; 327 328 static const u32 fiji_golden_common_all[] = 329 { 330 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 331 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 332 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 333 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 334 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 335 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 336 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 337 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 338 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 339 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 340 }; 341 342 static const u32 golden_settings_fiji_a10[] = 343 { 344 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 345 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 346 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 347 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 348 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 349 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 350 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 351 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 352 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 353 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 354 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 355 }; 356 357 static const u32 fiji_mgcg_cgcg_init[] = 358 { 359 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 360 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 361 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 362 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 363 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 364 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 365 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 366 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 367 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 368 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 369 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 370 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 371 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 372 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 373 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 374 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 375 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 376 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 377 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 378 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 379 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 380 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 381 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 384 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 385 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 386 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 387 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 388 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 391 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 392 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 393 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 394 }; 395 396 static const u32 golden_settings_iceland_a11[] = 397 { 398 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 399 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 400 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 401 mmGB_GPU_ID, 0x0000000f, 0x00000000, 402 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 403 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 404 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 405 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 406 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 407 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 408 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 409 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 410 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 411 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 412 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 413 }; 414 415 static const u32 iceland_golden_common_all[] = 416 { 417 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 418 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 419 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 420 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 421 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 422 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 423 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 424 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 425 }; 426 427 static const u32 iceland_mgcg_cgcg_init[] = 428 { 429 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 430 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 431 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 432 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 434 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 435 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 436 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 437 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 438 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 439 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 440 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 441 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 451 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 452 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 454 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 455 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 456 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 457 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 459 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 460 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 461 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 462 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 463 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 464 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 465 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 466 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 467 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 468 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 469 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 470 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 471 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 472 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 473 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 474 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 475 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 476 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 477 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 478 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 479 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 480 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 481 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 482 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 483 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 484 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 485 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 486 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 487 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 488 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 489 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 490 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 491 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 492 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 493 }; 494 495 static const u32 cz_golden_settings_a11[] = 496 { 497 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 498 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 499 mmGB_GPU_ID, 0x0000000f, 0x00000000, 500 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 501 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 502 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 503 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 504 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 505 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 506 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 507 }; 508 509 static const u32 cz_golden_common_all[] = 510 { 511 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 512 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 513 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 514 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 515 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 516 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 517 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 518 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 519 }; 520 521 static const u32 cz_mgcg_cgcg_init[] = 522 { 523 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 524 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 525 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 526 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 527 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 528 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 529 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 530 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 531 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 532 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 533 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 534 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 535 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 536 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 537 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 538 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 539 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 540 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 541 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 542 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 543 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 544 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 545 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 548 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 549 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 550 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 551 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 552 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 553 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 554 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 555 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 556 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 557 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 558 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 559 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 560 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 561 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 562 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 563 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 564 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 565 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 566 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 567 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 568 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 569 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 570 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 571 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 572 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 573 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 574 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 575 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 576 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 577 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 578 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 579 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 580 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 581 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 582 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 583 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 584 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 585 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 586 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 587 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 588 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 589 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 590 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 591 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 592 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 593 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 594 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 595 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 596 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 597 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 598 }; 599 600 static const u32 stoney_golden_settings_a11[] = 601 { 602 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 603 mmGB_GPU_ID, 0x0000000f, 0x00000000, 604 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 605 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 606 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 607 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 608 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 609 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 610 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 611 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 612 }; 613 614 static const u32 stoney_golden_common_all[] = 615 { 616 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 617 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 618 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 619 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 620 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 621 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 622 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 623 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 624 }; 625 626 static const u32 stoney_mgcg_cgcg_init[] = 627 { 628 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 629 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 630 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 631 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 632 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 633 mmATC_MISC_CG, 0xffffffff, 0x000c0200, 634 }; 635 636 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 637 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 638 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 639 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 640 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 641 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 642 643 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 644 { 645 switch (adev->asic_type) { 646 case CHIP_TOPAZ: 647 amdgpu_program_register_sequence(adev, 648 iceland_mgcg_cgcg_init, 649 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 650 amdgpu_program_register_sequence(adev, 651 golden_settings_iceland_a11, 652 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 653 amdgpu_program_register_sequence(adev, 654 iceland_golden_common_all, 655 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 656 break; 657 case CHIP_FIJI: 658 amdgpu_program_register_sequence(adev, 659 fiji_mgcg_cgcg_init, 660 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 661 amdgpu_program_register_sequence(adev, 662 golden_settings_fiji_a10, 663 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 664 amdgpu_program_register_sequence(adev, 665 fiji_golden_common_all, 666 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 667 break; 668 669 case CHIP_TONGA: 670 amdgpu_program_register_sequence(adev, 671 tonga_mgcg_cgcg_init, 672 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 673 amdgpu_program_register_sequence(adev, 674 golden_settings_tonga_a11, 675 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 676 amdgpu_program_register_sequence(adev, 677 tonga_golden_common_all, 678 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 679 break; 680 case CHIP_POLARIS11: 681 amdgpu_program_register_sequence(adev, 682 golden_settings_polaris11_a11, 683 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 684 amdgpu_program_register_sequence(adev, 685 polaris11_golden_common_all, 686 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 687 break; 688 case CHIP_POLARIS10: 689 amdgpu_program_register_sequence(adev, 690 golden_settings_polaris10_a11, 691 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 692 amdgpu_program_register_sequence(adev, 693 polaris10_golden_common_all, 694 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 695 break; 696 case CHIP_CARRIZO: 697 amdgpu_program_register_sequence(adev, 698 cz_mgcg_cgcg_init, 699 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 700 amdgpu_program_register_sequence(adev, 701 cz_golden_settings_a11, 702 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 703 amdgpu_program_register_sequence(adev, 704 cz_golden_common_all, 705 (const u32)ARRAY_SIZE(cz_golden_common_all)); 706 break; 707 case CHIP_STONEY: 708 amdgpu_program_register_sequence(adev, 709 stoney_mgcg_cgcg_init, 710 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 711 amdgpu_program_register_sequence(adev, 712 stoney_golden_settings_a11, 713 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 714 amdgpu_program_register_sequence(adev, 715 stoney_golden_common_all, 716 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 717 break; 718 default: 719 break; 720 } 721 } 722 723 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 724 { 725 int i; 726 727 adev->gfx.scratch.num_reg = 7; 728 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 729 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 730 adev->gfx.scratch.free[i] = true; 731 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 732 } 733 } 734 735 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 736 { 737 struct amdgpu_device *adev = ring->adev; 738 uint32_t scratch; 739 uint32_t tmp = 0; 740 unsigned i; 741 int r; 742 743 r = amdgpu_gfx_scratch_get(adev, &scratch); 744 if (r) { 745 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 746 return r; 747 } 748 WREG32(scratch, 0xCAFEDEAD); 749 r = amdgpu_ring_alloc(ring, 3); 750 if (r) { 751 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 752 ring->idx, r); 753 amdgpu_gfx_scratch_free(adev, scratch); 754 return r; 755 } 756 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 757 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 758 amdgpu_ring_write(ring, 0xDEADBEEF); 759 amdgpu_ring_commit(ring); 760 761 for (i = 0; i < adev->usec_timeout; i++) { 762 tmp = RREG32(scratch); 763 if (tmp == 0xDEADBEEF) 764 break; 765 DRM_UDELAY(1); 766 } 767 if (i < adev->usec_timeout) { 768 DRM_INFO("ring test on %d succeeded in %d usecs\n", 769 ring->idx, i); 770 } else { 771 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 772 ring->idx, scratch, tmp); 773 r = -EINVAL; 774 } 775 amdgpu_gfx_scratch_free(adev, scratch); 776 return r; 777 } 778 779 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) 780 { 781 struct amdgpu_device *adev = ring->adev; 782 struct amdgpu_ib ib; 783 struct fence *f = NULL; 784 uint32_t scratch; 785 uint32_t tmp = 0; 786 unsigned i; 787 int r; 788 789 r = amdgpu_gfx_scratch_get(adev, &scratch); 790 if (r) { 791 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r); 792 return r; 793 } 794 WREG32(scratch, 0xCAFEDEAD); 795 memset(&ib, 0, sizeof(ib)); 796 r = amdgpu_ib_get(adev, NULL, 256, &ib); 797 if (r) { 798 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 799 goto err1; 800 } 801 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 802 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 803 ib.ptr[2] = 0xDEADBEEF; 804 ib.length_dw = 3; 805 806 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 807 if (r) 808 goto err2; 809 810 r = fence_wait(f, false); 811 if (r) { 812 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 813 goto err2; 814 } 815 for (i = 0; i < adev->usec_timeout; i++) { 816 tmp = RREG32(scratch); 817 if (tmp == 0xDEADBEEF) 818 break; 819 DRM_UDELAY(1); 820 } 821 if (i < adev->usec_timeout) { 822 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", 823 ring->idx, i); 824 goto err2; 825 } else { 826 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 827 scratch, tmp); 828 r = -EINVAL; 829 } 830 err2: 831 fence_put(f); 832 amdgpu_ib_free(adev, &ib, NULL); 833 fence_put(f); 834 err1: 835 amdgpu_gfx_scratch_free(adev, scratch); 836 return r; 837 } 838 839 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 840 { 841 const char *chip_name; 842 char fw_name[30]; 843 int err; 844 struct amdgpu_firmware_info *info = NULL; 845 const struct common_firmware_header *header = NULL; 846 const struct gfx_firmware_header_v1_0 *cp_hdr; 847 const struct rlc_firmware_header_v2_0 *rlc_hdr; 848 unsigned int *tmp = NULL, i; 849 850 DRM_DEBUG("\n"); 851 852 switch (adev->asic_type) { 853 case CHIP_TOPAZ: 854 chip_name = "topaz"; 855 break; 856 case CHIP_TONGA: 857 chip_name = "tonga"; 858 break; 859 case CHIP_CARRIZO: 860 chip_name = "carrizo"; 861 break; 862 case CHIP_FIJI: 863 chip_name = "fiji"; 864 break; 865 case CHIP_POLARIS11: 866 chip_name = "polaris11"; 867 break; 868 case CHIP_POLARIS10: 869 chip_name = "polaris10"; 870 break; 871 case CHIP_STONEY: 872 chip_name = "stoney"; 873 break; 874 default: 875 BUG(); 876 } 877 878 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 879 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 880 if (err) 881 goto out; 882 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 883 if (err) 884 goto out; 885 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 886 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 887 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 888 889 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 890 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 891 if (err) 892 goto out; 893 err = amdgpu_ucode_validate(adev->gfx.me_fw); 894 if (err) 895 goto out; 896 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 897 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 898 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 899 900 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 901 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 902 if (err) 903 goto out; 904 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 905 if (err) 906 goto out; 907 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 908 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 909 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 910 911 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 912 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 913 if (err) 914 goto out; 915 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 916 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 917 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 918 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 919 920 adev->gfx.rlc.save_and_restore_offset = 921 le32_to_cpu(rlc_hdr->save_and_restore_offset); 922 adev->gfx.rlc.clear_state_descriptor_offset = 923 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 924 adev->gfx.rlc.avail_scratch_ram_locations = 925 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 926 adev->gfx.rlc.reg_restore_list_size = 927 le32_to_cpu(rlc_hdr->reg_restore_list_size); 928 adev->gfx.rlc.reg_list_format_start = 929 le32_to_cpu(rlc_hdr->reg_list_format_start); 930 adev->gfx.rlc.reg_list_format_separate_start = 931 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 932 adev->gfx.rlc.starting_offsets_start = 933 le32_to_cpu(rlc_hdr->starting_offsets_start); 934 adev->gfx.rlc.reg_list_format_size_bytes = 935 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 936 adev->gfx.rlc.reg_list_size_bytes = 937 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 938 939 adev->gfx.rlc.register_list_format = 940 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 941 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 942 943 if (!adev->gfx.rlc.register_list_format) { 944 err = -ENOMEM; 945 goto out; 946 } 947 948 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 949 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 950 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 951 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 952 953 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 954 955 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 956 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 957 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 958 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 959 960 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 961 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 962 if (err) 963 goto out; 964 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 965 if (err) 966 goto out; 967 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 968 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 969 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 970 971 if ((adev->asic_type != CHIP_STONEY) && 972 (adev->asic_type != CHIP_TOPAZ)) { 973 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 974 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 975 if (!err) { 976 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 977 if (err) 978 goto out; 979 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 980 adev->gfx.mec2_fw->data; 981 adev->gfx.mec2_fw_version = 982 le32_to_cpu(cp_hdr->header.ucode_version); 983 adev->gfx.mec2_feature_version = 984 le32_to_cpu(cp_hdr->ucode_feature_version); 985 } else { 986 err = 0; 987 adev->gfx.mec2_fw = NULL; 988 } 989 } 990 991 if (adev->firmware.smu_load) { 992 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 993 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 994 info->fw = adev->gfx.pfp_fw; 995 header = (const struct common_firmware_header *)info->fw->data; 996 adev->firmware.fw_size += 997 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 998 999 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1000 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1001 info->fw = adev->gfx.me_fw; 1002 header = (const struct common_firmware_header *)info->fw->data; 1003 adev->firmware.fw_size += 1004 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1005 1006 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1007 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1008 info->fw = adev->gfx.ce_fw; 1009 header = (const struct common_firmware_header *)info->fw->data; 1010 adev->firmware.fw_size += 1011 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1012 1013 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1014 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1015 info->fw = adev->gfx.rlc_fw; 1016 header = (const struct common_firmware_header *)info->fw->data; 1017 adev->firmware.fw_size += 1018 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1019 1020 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1021 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1022 info->fw = adev->gfx.mec_fw; 1023 header = (const struct common_firmware_header *)info->fw->data; 1024 adev->firmware.fw_size += 1025 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1026 1027 if (adev->gfx.mec2_fw) { 1028 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1029 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1030 info->fw = adev->gfx.mec2_fw; 1031 header = (const struct common_firmware_header *)info->fw->data; 1032 adev->firmware.fw_size += 1033 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1034 } 1035 1036 } 1037 1038 out: 1039 if (err) { 1040 dev_err(adev->dev, 1041 "gfx8: Failed to load firmware \"%s\"\n", 1042 fw_name); 1043 release_firmware(adev->gfx.pfp_fw); 1044 adev->gfx.pfp_fw = NULL; 1045 release_firmware(adev->gfx.me_fw); 1046 adev->gfx.me_fw = NULL; 1047 release_firmware(adev->gfx.ce_fw); 1048 adev->gfx.ce_fw = NULL; 1049 release_firmware(adev->gfx.rlc_fw); 1050 adev->gfx.rlc_fw = NULL; 1051 release_firmware(adev->gfx.mec_fw); 1052 adev->gfx.mec_fw = NULL; 1053 release_firmware(adev->gfx.mec2_fw); 1054 adev->gfx.mec2_fw = NULL; 1055 } 1056 return err; 1057 } 1058 1059 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1060 volatile u32 *buffer) 1061 { 1062 u32 count = 0, i; 1063 const struct cs_section_def *sect = NULL; 1064 const struct cs_extent_def *ext = NULL; 1065 1066 if (adev->gfx.rlc.cs_data == NULL) 1067 return; 1068 if (buffer == NULL) 1069 return; 1070 1071 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1072 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1073 1074 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1075 buffer[count++] = cpu_to_le32(0x80000000); 1076 buffer[count++] = cpu_to_le32(0x80000000); 1077 1078 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1079 for (ext = sect->section; ext->extent != NULL; ++ext) { 1080 if (sect->id == SECT_CONTEXT) { 1081 buffer[count++] = 1082 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1083 buffer[count++] = cpu_to_le32(ext->reg_index - 1084 PACKET3_SET_CONTEXT_REG_START); 1085 for (i = 0; i < ext->reg_count; i++) 1086 buffer[count++] = cpu_to_le32(ext->extent[i]); 1087 } else { 1088 return; 1089 } 1090 } 1091 } 1092 1093 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1094 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1095 PACKET3_SET_CONTEXT_REG_START); 1096 switch (adev->asic_type) { 1097 case CHIP_TONGA: 1098 case CHIP_POLARIS10: 1099 buffer[count++] = cpu_to_le32(0x16000012); 1100 buffer[count++] = cpu_to_le32(0x0000002A); 1101 break; 1102 case CHIP_POLARIS11: 1103 buffer[count++] = cpu_to_le32(0x16000012); 1104 buffer[count++] = cpu_to_le32(0x00000000); 1105 break; 1106 case CHIP_FIJI: 1107 buffer[count++] = cpu_to_le32(0x3a00161a); 1108 buffer[count++] = cpu_to_le32(0x0000002e); 1109 break; 1110 case CHIP_TOPAZ: 1111 case CHIP_CARRIZO: 1112 buffer[count++] = cpu_to_le32(0x00000002); 1113 buffer[count++] = cpu_to_le32(0x00000000); 1114 break; 1115 case CHIP_STONEY: 1116 buffer[count++] = cpu_to_le32(0x00000000); 1117 buffer[count++] = cpu_to_le32(0x00000000); 1118 break; 1119 default: 1120 buffer[count++] = cpu_to_le32(0x00000000); 1121 buffer[count++] = cpu_to_le32(0x00000000); 1122 break; 1123 } 1124 1125 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1126 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1127 1128 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1129 buffer[count++] = cpu_to_le32(0); 1130 } 1131 1132 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1133 { 1134 int r; 1135 1136 /* clear state block */ 1137 if (adev->gfx.rlc.clear_state_obj) { 1138 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1139 if (unlikely(r != 0)) 1140 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); 1141 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1142 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1143 1144 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1145 adev->gfx.rlc.clear_state_obj = NULL; 1146 } 1147 } 1148 1149 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1150 { 1151 volatile u32 *dst_ptr; 1152 u32 dws; 1153 const struct cs_section_def *cs_data; 1154 int r; 1155 1156 adev->gfx.rlc.cs_data = vi_cs_data; 1157 1158 cs_data = adev->gfx.rlc.cs_data; 1159 1160 if (cs_data) { 1161 /* clear state block */ 1162 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1163 1164 if (adev->gfx.rlc.clear_state_obj == NULL) { 1165 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1166 AMDGPU_GEM_DOMAIN_VRAM, 1167 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1168 NULL, NULL, 1169 &adev->gfx.rlc.clear_state_obj); 1170 if (r) { 1171 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1172 gfx_v8_0_rlc_fini(adev); 1173 return r; 1174 } 1175 } 1176 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1177 if (unlikely(r != 0)) { 1178 gfx_v8_0_rlc_fini(adev); 1179 return r; 1180 } 1181 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1182 &adev->gfx.rlc.clear_state_gpu_addr); 1183 if (r) { 1184 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1185 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); 1186 gfx_v8_0_rlc_fini(adev); 1187 return r; 1188 } 1189 1190 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1191 if (r) { 1192 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); 1193 gfx_v8_0_rlc_fini(adev); 1194 return r; 1195 } 1196 /* set up the cs buffer */ 1197 dst_ptr = adev->gfx.rlc.cs_ptr; 1198 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1199 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1200 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1201 } 1202 1203 return 0; 1204 } 1205 1206 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1207 { 1208 int r; 1209 1210 if (adev->gfx.mec.hpd_eop_obj) { 1211 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1212 if (unlikely(r != 0)) 1213 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1214 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1215 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1216 1217 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1218 adev->gfx.mec.hpd_eop_obj = NULL; 1219 } 1220 } 1221 1222 #define MEC_HPD_SIZE 2048 1223 1224 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1225 { 1226 int r; 1227 u32 *hpd; 1228 1229 /* 1230 * we assign only 1 pipe because all other pipes will 1231 * be handled by KFD 1232 */ 1233 adev->gfx.mec.num_mec = 1; 1234 adev->gfx.mec.num_pipe = 1; 1235 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1236 1237 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1238 r = amdgpu_bo_create(adev, 1239 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 1240 PAGE_SIZE, true, 1241 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1242 &adev->gfx.mec.hpd_eop_obj); 1243 if (r) { 1244 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1245 return r; 1246 } 1247 } 1248 1249 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1250 if (unlikely(r != 0)) { 1251 gfx_v8_0_mec_fini(adev); 1252 return r; 1253 } 1254 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1255 &adev->gfx.mec.hpd_eop_gpu_addr); 1256 if (r) { 1257 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1258 gfx_v8_0_mec_fini(adev); 1259 return r; 1260 } 1261 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1262 if (r) { 1263 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1264 gfx_v8_0_mec_fini(adev); 1265 return r; 1266 } 1267 1268 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 1269 1270 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1271 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1272 1273 return 0; 1274 } 1275 1276 static const u32 vgpr_init_compute_shader[] = 1277 { 1278 0x7e000209, 0x7e020208, 1279 0x7e040207, 0x7e060206, 1280 0x7e080205, 0x7e0a0204, 1281 0x7e0c0203, 0x7e0e0202, 1282 0x7e100201, 0x7e120200, 1283 0x7e140209, 0x7e160208, 1284 0x7e180207, 0x7e1a0206, 1285 0x7e1c0205, 0x7e1e0204, 1286 0x7e200203, 0x7e220202, 1287 0x7e240201, 0x7e260200, 1288 0x7e280209, 0x7e2a0208, 1289 0x7e2c0207, 0x7e2e0206, 1290 0x7e300205, 0x7e320204, 1291 0x7e340203, 0x7e360202, 1292 0x7e380201, 0x7e3a0200, 1293 0x7e3c0209, 0x7e3e0208, 1294 0x7e400207, 0x7e420206, 1295 0x7e440205, 0x7e460204, 1296 0x7e480203, 0x7e4a0202, 1297 0x7e4c0201, 0x7e4e0200, 1298 0x7e500209, 0x7e520208, 1299 0x7e540207, 0x7e560206, 1300 0x7e580205, 0x7e5a0204, 1301 0x7e5c0203, 0x7e5e0202, 1302 0x7e600201, 0x7e620200, 1303 0x7e640209, 0x7e660208, 1304 0x7e680207, 0x7e6a0206, 1305 0x7e6c0205, 0x7e6e0204, 1306 0x7e700203, 0x7e720202, 1307 0x7e740201, 0x7e760200, 1308 0x7e780209, 0x7e7a0208, 1309 0x7e7c0207, 0x7e7e0206, 1310 0xbf8a0000, 0xbf810000, 1311 }; 1312 1313 static const u32 sgpr_init_compute_shader[] = 1314 { 1315 0xbe8a0100, 0xbe8c0102, 1316 0xbe8e0104, 0xbe900106, 1317 0xbe920108, 0xbe940100, 1318 0xbe960102, 0xbe980104, 1319 0xbe9a0106, 0xbe9c0108, 1320 0xbe9e0100, 0xbea00102, 1321 0xbea20104, 0xbea40106, 1322 0xbea60108, 0xbea80100, 1323 0xbeaa0102, 0xbeac0104, 1324 0xbeae0106, 0xbeb00108, 1325 0xbeb20100, 0xbeb40102, 1326 0xbeb60104, 0xbeb80106, 1327 0xbeba0108, 0xbebc0100, 1328 0xbebe0102, 0xbec00104, 1329 0xbec20106, 0xbec40108, 1330 0xbec60100, 0xbec80102, 1331 0xbee60004, 0xbee70005, 1332 0xbeea0006, 0xbeeb0007, 1333 0xbee80008, 0xbee90009, 1334 0xbefc0000, 0xbf8a0000, 1335 0xbf810000, 0x00000000, 1336 }; 1337 1338 static const u32 vgpr_init_regs[] = 1339 { 1340 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1341 mmCOMPUTE_RESOURCE_LIMITS, 0, 1342 mmCOMPUTE_NUM_THREAD_X, 256*4, 1343 mmCOMPUTE_NUM_THREAD_Y, 1, 1344 mmCOMPUTE_NUM_THREAD_Z, 1, 1345 mmCOMPUTE_PGM_RSRC2, 20, 1346 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1347 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1348 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1349 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1350 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1351 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1352 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1353 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1354 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1355 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1356 }; 1357 1358 static const u32 sgpr1_init_regs[] = 1359 { 1360 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1361 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1362 mmCOMPUTE_NUM_THREAD_X, 256*5, 1363 mmCOMPUTE_NUM_THREAD_Y, 1, 1364 mmCOMPUTE_NUM_THREAD_Z, 1, 1365 mmCOMPUTE_PGM_RSRC2, 20, 1366 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1367 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1368 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1369 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1370 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1371 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1372 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1373 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1374 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1375 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1376 }; 1377 1378 static const u32 sgpr2_init_regs[] = 1379 { 1380 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1381 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1382 mmCOMPUTE_NUM_THREAD_X, 256*5, 1383 mmCOMPUTE_NUM_THREAD_Y, 1, 1384 mmCOMPUTE_NUM_THREAD_Z, 1, 1385 mmCOMPUTE_PGM_RSRC2, 20, 1386 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1387 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1388 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1389 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1390 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1391 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1392 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1393 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1394 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1395 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1396 }; 1397 1398 static const u32 sec_ded_counter_registers[] = 1399 { 1400 mmCPC_EDC_ATC_CNT, 1401 mmCPC_EDC_SCRATCH_CNT, 1402 mmCPC_EDC_UCODE_CNT, 1403 mmCPF_EDC_ATC_CNT, 1404 mmCPF_EDC_ROQ_CNT, 1405 mmCPF_EDC_TAG_CNT, 1406 mmCPG_EDC_ATC_CNT, 1407 mmCPG_EDC_DMA_CNT, 1408 mmCPG_EDC_TAG_CNT, 1409 mmDC_EDC_CSINVOC_CNT, 1410 mmDC_EDC_RESTORE_CNT, 1411 mmDC_EDC_STATE_CNT, 1412 mmGDS_EDC_CNT, 1413 mmGDS_EDC_GRBM_CNT, 1414 mmGDS_EDC_OA_DED, 1415 mmSPI_EDC_CNT, 1416 mmSQC_ATC_EDC_GATCL1_CNT, 1417 mmSQC_EDC_CNT, 1418 mmSQ_EDC_DED_CNT, 1419 mmSQ_EDC_INFO, 1420 mmSQ_EDC_SEC_CNT, 1421 mmTCC_EDC_CNT, 1422 mmTCP_ATC_EDC_GATCL1_CNT, 1423 mmTCP_EDC_CNT, 1424 mmTD_EDC_CNT 1425 }; 1426 1427 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1428 { 1429 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1430 struct amdgpu_ib ib; 1431 struct fence *f = NULL; 1432 int r, i; 1433 u32 tmp; 1434 unsigned total_size, vgpr_offset, sgpr_offset; 1435 u64 gpu_addr; 1436 1437 /* only supported on CZ */ 1438 if (adev->asic_type != CHIP_CARRIZO) 1439 return 0; 1440 1441 /* bail if the compute ring is not ready */ 1442 if (!ring->ready) 1443 return 0; 1444 1445 tmp = RREG32(mmGB_EDC_MODE); 1446 WREG32(mmGB_EDC_MODE, 0); 1447 1448 total_size = 1449 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1450 total_size += 1451 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1452 total_size += 1453 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1454 total_size = ALIGN(total_size, 256); 1455 vgpr_offset = total_size; 1456 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1457 sgpr_offset = total_size; 1458 total_size += sizeof(sgpr_init_compute_shader); 1459 1460 /* allocate an indirect buffer to put the commands in */ 1461 memset(&ib, 0, sizeof(ib)); 1462 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1463 if (r) { 1464 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1465 return r; 1466 } 1467 1468 /* load the compute shaders */ 1469 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1470 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1471 1472 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1473 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1474 1475 /* init the ib length to 0 */ 1476 ib.length_dw = 0; 1477 1478 /* VGPR */ 1479 /* write the register state for the compute dispatch */ 1480 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1481 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1482 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1483 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1484 } 1485 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1486 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1487 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1488 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1489 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1490 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1491 1492 /* write dispatch packet */ 1493 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1494 ib.ptr[ib.length_dw++] = 8; /* x */ 1495 ib.ptr[ib.length_dw++] = 1; /* y */ 1496 ib.ptr[ib.length_dw++] = 1; /* z */ 1497 ib.ptr[ib.length_dw++] = 1498 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1499 1500 /* write CS partial flush packet */ 1501 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1502 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1503 1504 /* SGPR1 */ 1505 /* write the register state for the compute dispatch */ 1506 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1507 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1508 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1509 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1510 } 1511 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1512 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1513 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1514 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1515 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1516 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1517 1518 /* write dispatch packet */ 1519 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1520 ib.ptr[ib.length_dw++] = 8; /* x */ 1521 ib.ptr[ib.length_dw++] = 1; /* y */ 1522 ib.ptr[ib.length_dw++] = 1; /* z */ 1523 ib.ptr[ib.length_dw++] = 1524 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1525 1526 /* write CS partial flush packet */ 1527 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1528 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1529 1530 /* SGPR2 */ 1531 /* write the register state for the compute dispatch */ 1532 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1533 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1534 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1535 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1536 } 1537 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1538 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1539 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1540 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1541 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1542 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1543 1544 /* write dispatch packet */ 1545 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1546 ib.ptr[ib.length_dw++] = 8; /* x */ 1547 ib.ptr[ib.length_dw++] = 1; /* y */ 1548 ib.ptr[ib.length_dw++] = 1; /* z */ 1549 ib.ptr[ib.length_dw++] = 1550 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1551 1552 /* write CS partial flush packet */ 1553 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1554 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1555 1556 /* shedule the ib on the ring */ 1557 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 1558 if (r) { 1559 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1560 goto fail; 1561 } 1562 1563 /* wait for the GPU to finish processing the IB */ 1564 r = fence_wait(f, false); 1565 if (r) { 1566 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1567 goto fail; 1568 } 1569 1570 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1571 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1572 WREG32(mmGB_EDC_MODE, tmp); 1573 1574 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1575 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1576 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1577 1578 1579 /* read back registers to clear the counters */ 1580 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1581 RREG32(sec_ded_counter_registers[i]); 1582 1583 fail: 1584 fence_put(f); 1585 amdgpu_ib_free(adev, &ib, NULL); 1586 fence_put(f); 1587 1588 return r; 1589 } 1590 1591 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1592 { 1593 u32 gb_addr_config; 1594 u32 mc_shared_chmap, mc_arb_ramcfg; 1595 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1596 u32 tmp; 1597 int ret; 1598 1599 switch (adev->asic_type) { 1600 case CHIP_TOPAZ: 1601 adev->gfx.config.max_shader_engines = 1; 1602 adev->gfx.config.max_tile_pipes = 2; 1603 adev->gfx.config.max_cu_per_sh = 6; 1604 adev->gfx.config.max_sh_per_se = 1; 1605 adev->gfx.config.max_backends_per_se = 2; 1606 adev->gfx.config.max_texture_channel_caches = 2; 1607 adev->gfx.config.max_gprs = 256; 1608 adev->gfx.config.max_gs_threads = 32; 1609 adev->gfx.config.max_hw_contexts = 8; 1610 1611 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1612 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1613 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1614 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1615 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1616 break; 1617 case CHIP_FIJI: 1618 adev->gfx.config.max_shader_engines = 4; 1619 adev->gfx.config.max_tile_pipes = 16; 1620 adev->gfx.config.max_cu_per_sh = 16; 1621 adev->gfx.config.max_sh_per_se = 1; 1622 adev->gfx.config.max_backends_per_se = 4; 1623 adev->gfx.config.max_texture_channel_caches = 16; 1624 adev->gfx.config.max_gprs = 256; 1625 adev->gfx.config.max_gs_threads = 32; 1626 adev->gfx.config.max_hw_contexts = 8; 1627 1628 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1629 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1630 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1631 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1632 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1633 break; 1634 case CHIP_POLARIS11: 1635 ret = amdgpu_atombios_get_gfx_info(adev); 1636 if (ret) 1637 return ret; 1638 adev->gfx.config.max_gprs = 256; 1639 adev->gfx.config.max_gs_threads = 32; 1640 adev->gfx.config.max_hw_contexts = 8; 1641 1642 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1643 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1644 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1645 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1646 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1647 break; 1648 case CHIP_POLARIS10: 1649 ret = amdgpu_atombios_get_gfx_info(adev); 1650 if (ret) 1651 return ret; 1652 adev->gfx.config.max_gprs = 256; 1653 adev->gfx.config.max_gs_threads = 32; 1654 adev->gfx.config.max_hw_contexts = 8; 1655 1656 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1657 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1658 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1659 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1660 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1661 break; 1662 case CHIP_TONGA: 1663 adev->gfx.config.max_shader_engines = 4; 1664 adev->gfx.config.max_tile_pipes = 8; 1665 adev->gfx.config.max_cu_per_sh = 8; 1666 adev->gfx.config.max_sh_per_se = 1; 1667 adev->gfx.config.max_backends_per_se = 2; 1668 adev->gfx.config.max_texture_channel_caches = 8; 1669 adev->gfx.config.max_gprs = 256; 1670 adev->gfx.config.max_gs_threads = 32; 1671 adev->gfx.config.max_hw_contexts = 8; 1672 1673 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1674 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1675 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1676 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1677 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1678 break; 1679 case CHIP_CARRIZO: 1680 adev->gfx.config.max_shader_engines = 1; 1681 adev->gfx.config.max_tile_pipes = 2; 1682 adev->gfx.config.max_sh_per_se = 1; 1683 adev->gfx.config.max_backends_per_se = 2; 1684 1685 switch (adev->pdev->revision) { 1686 case 0xc4: 1687 case 0x84: 1688 case 0xc8: 1689 case 0xcc: 1690 case 0xe1: 1691 case 0xe3: 1692 /* B10 */ 1693 adev->gfx.config.max_cu_per_sh = 8; 1694 break; 1695 case 0xc5: 1696 case 0x81: 1697 case 0x85: 1698 case 0xc9: 1699 case 0xcd: 1700 case 0xe2: 1701 case 0xe4: 1702 /* B8 */ 1703 adev->gfx.config.max_cu_per_sh = 6; 1704 break; 1705 case 0xc6: 1706 case 0xca: 1707 case 0xce: 1708 case 0x88: 1709 /* B6 */ 1710 adev->gfx.config.max_cu_per_sh = 6; 1711 break; 1712 case 0xc7: 1713 case 0x87: 1714 case 0xcb: 1715 case 0xe5: 1716 case 0x89: 1717 default: 1718 /* B4 */ 1719 adev->gfx.config.max_cu_per_sh = 4; 1720 break; 1721 } 1722 1723 adev->gfx.config.max_texture_channel_caches = 2; 1724 adev->gfx.config.max_gprs = 256; 1725 adev->gfx.config.max_gs_threads = 32; 1726 adev->gfx.config.max_hw_contexts = 8; 1727 1728 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1729 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1730 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1731 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1732 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1733 break; 1734 case CHIP_STONEY: 1735 adev->gfx.config.max_shader_engines = 1; 1736 adev->gfx.config.max_tile_pipes = 2; 1737 adev->gfx.config.max_sh_per_se = 1; 1738 adev->gfx.config.max_backends_per_se = 1; 1739 1740 switch (adev->pdev->revision) { 1741 case 0xc0: 1742 case 0xc1: 1743 case 0xc2: 1744 case 0xc4: 1745 case 0xc8: 1746 case 0xc9: 1747 adev->gfx.config.max_cu_per_sh = 3; 1748 break; 1749 case 0xd0: 1750 case 0xd1: 1751 case 0xd2: 1752 default: 1753 adev->gfx.config.max_cu_per_sh = 2; 1754 break; 1755 } 1756 1757 adev->gfx.config.max_texture_channel_caches = 2; 1758 adev->gfx.config.max_gprs = 256; 1759 adev->gfx.config.max_gs_threads = 16; 1760 adev->gfx.config.max_hw_contexts = 8; 1761 1762 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1763 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1764 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1765 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1766 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1767 break; 1768 default: 1769 adev->gfx.config.max_shader_engines = 2; 1770 adev->gfx.config.max_tile_pipes = 4; 1771 adev->gfx.config.max_cu_per_sh = 2; 1772 adev->gfx.config.max_sh_per_se = 1; 1773 adev->gfx.config.max_backends_per_se = 2; 1774 adev->gfx.config.max_texture_channel_caches = 4; 1775 adev->gfx.config.max_gprs = 256; 1776 adev->gfx.config.max_gs_threads = 32; 1777 adev->gfx.config.max_hw_contexts = 8; 1778 1779 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1780 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1781 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1782 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1783 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1784 break; 1785 } 1786 1787 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1788 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1789 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1790 1791 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1792 adev->gfx.config.mem_max_burst_length_bytes = 256; 1793 if (adev->flags & AMD_IS_APU) { 1794 /* Get memory bank mapping mode. */ 1795 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1796 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1797 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1798 1799 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1800 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1801 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1802 1803 /* Validate settings in case only one DIMM installed. */ 1804 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1805 dimm00_addr_map = 0; 1806 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1807 dimm01_addr_map = 0; 1808 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1809 dimm10_addr_map = 0; 1810 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1811 dimm11_addr_map = 0; 1812 1813 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1814 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1815 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1816 adev->gfx.config.mem_row_size_in_kb = 2; 1817 else 1818 adev->gfx.config.mem_row_size_in_kb = 1; 1819 } else { 1820 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1821 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1822 if (adev->gfx.config.mem_row_size_in_kb > 4) 1823 adev->gfx.config.mem_row_size_in_kb = 4; 1824 } 1825 1826 adev->gfx.config.shader_engine_tile_size = 32; 1827 adev->gfx.config.num_gpus = 1; 1828 adev->gfx.config.multi_gpu_tile_size = 64; 1829 1830 /* fix up row size */ 1831 switch (adev->gfx.config.mem_row_size_in_kb) { 1832 case 1: 1833 default: 1834 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1835 break; 1836 case 2: 1837 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1838 break; 1839 case 4: 1840 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1841 break; 1842 } 1843 adev->gfx.config.gb_addr_config = gb_addr_config; 1844 1845 return 0; 1846 } 1847 1848 static int gfx_v8_0_sw_init(void *handle) 1849 { 1850 int i, r; 1851 struct amdgpu_ring *ring; 1852 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1853 1854 /* EOP Event */ 1855 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 1856 if (r) 1857 return r; 1858 1859 /* Privileged reg */ 1860 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 1861 if (r) 1862 return r; 1863 1864 /* Privileged inst */ 1865 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 1866 if (r) 1867 return r; 1868 1869 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1870 1871 gfx_v8_0_scratch_init(adev); 1872 1873 r = gfx_v8_0_init_microcode(adev); 1874 if (r) { 1875 DRM_ERROR("Failed to load gfx firmware!\n"); 1876 return r; 1877 } 1878 1879 r = gfx_v8_0_rlc_init(adev); 1880 if (r) { 1881 DRM_ERROR("Failed to init rlc BOs!\n"); 1882 return r; 1883 } 1884 1885 r = gfx_v8_0_mec_init(adev); 1886 if (r) { 1887 DRM_ERROR("Failed to init MEC BOs!\n"); 1888 return r; 1889 } 1890 1891 /* set up the gfx ring */ 1892 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1893 ring = &adev->gfx.gfx_ring[i]; 1894 ring->ring_obj = NULL; 1895 sprintf(ring->name, "gfx"); 1896 /* no gfx doorbells on iceland */ 1897 if (adev->asic_type != CHIP_TOPAZ) { 1898 ring->use_doorbell = true; 1899 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 1900 } 1901 1902 r = amdgpu_ring_init(adev, ring, 1024, 1903 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 1904 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, 1905 AMDGPU_RING_TYPE_GFX); 1906 if (r) 1907 return r; 1908 } 1909 1910 /* set up the compute queues */ 1911 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1912 unsigned irq_type; 1913 1914 /* max 32 queues per MEC */ 1915 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 1916 DRM_ERROR("Too many (%d) compute rings!\n", i); 1917 break; 1918 } 1919 ring = &adev->gfx.compute_ring[i]; 1920 ring->ring_obj = NULL; 1921 ring->use_doorbell = true; 1922 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 1923 ring->me = 1; /* first MEC */ 1924 ring->pipe = i / 8; 1925 ring->queue = i % 8; 1926 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1927 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 1928 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1929 r = amdgpu_ring_init(adev, ring, 1024, 1930 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 1931 &adev->gfx.eop_irq, irq_type, 1932 AMDGPU_RING_TYPE_COMPUTE); 1933 if (r) 1934 return r; 1935 } 1936 1937 /* reserve GDS, GWS and OA resource for gfx */ 1938 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, 1939 PAGE_SIZE, true, 1940 AMDGPU_GEM_DOMAIN_GDS, 0, NULL, 1941 NULL, &adev->gds.gds_gfx_bo); 1942 if (r) 1943 return r; 1944 1945 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, 1946 PAGE_SIZE, true, 1947 AMDGPU_GEM_DOMAIN_GWS, 0, NULL, 1948 NULL, &adev->gds.gws_gfx_bo); 1949 if (r) 1950 return r; 1951 1952 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, 1953 PAGE_SIZE, true, 1954 AMDGPU_GEM_DOMAIN_OA, 0, NULL, 1955 NULL, &adev->gds.oa_gfx_bo); 1956 if (r) 1957 return r; 1958 1959 adev->gfx.ce_ram_size = 0x8000; 1960 1961 r = gfx_v8_0_gpu_early_init(adev); 1962 if (r) 1963 return r; 1964 1965 return 0; 1966 } 1967 1968 static int gfx_v8_0_sw_fini(void *handle) 1969 { 1970 int i; 1971 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1972 1973 amdgpu_bo_unref(&adev->gds.oa_gfx_bo); 1974 amdgpu_bo_unref(&adev->gds.gws_gfx_bo); 1975 amdgpu_bo_unref(&adev->gds.gds_gfx_bo); 1976 1977 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1978 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1979 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1980 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1981 1982 gfx_v8_0_mec_fini(adev); 1983 1984 gfx_v8_0_rlc_fini(adev); 1985 1986 kfree(adev->gfx.rlc.register_list_format); 1987 1988 return 0; 1989 } 1990 1991 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 1992 { 1993 uint32_t *modearray, *mod2array; 1994 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 1995 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 1996 u32 reg_offset; 1997 1998 modearray = adev->gfx.config.tile_mode_array; 1999 mod2array = adev->gfx.config.macrotile_mode_array; 2000 2001 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2002 modearray[reg_offset] = 0; 2003 2004 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2005 mod2array[reg_offset] = 0; 2006 2007 switch (adev->asic_type) { 2008 case CHIP_TOPAZ: 2009 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2010 PIPE_CONFIG(ADDR_SURF_P2) | 2011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2013 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2014 PIPE_CONFIG(ADDR_SURF_P2) | 2015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2017 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2018 PIPE_CONFIG(ADDR_SURF_P2) | 2019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2021 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2022 PIPE_CONFIG(ADDR_SURF_P2) | 2023 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2024 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2025 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2026 PIPE_CONFIG(ADDR_SURF_P2) | 2027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2028 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2029 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2030 PIPE_CONFIG(ADDR_SURF_P2) | 2031 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2033 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2034 PIPE_CONFIG(ADDR_SURF_P2) | 2035 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2036 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2037 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2038 PIPE_CONFIG(ADDR_SURF_P2)); 2039 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2040 PIPE_CONFIG(ADDR_SURF_P2) | 2041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2043 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2044 PIPE_CONFIG(ADDR_SURF_P2) | 2045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2047 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2048 PIPE_CONFIG(ADDR_SURF_P2) | 2049 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2051 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2052 PIPE_CONFIG(ADDR_SURF_P2) | 2053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2055 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2056 PIPE_CONFIG(ADDR_SURF_P2) | 2057 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2059 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2060 PIPE_CONFIG(ADDR_SURF_P2) | 2061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2063 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2064 PIPE_CONFIG(ADDR_SURF_P2) | 2065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2067 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2068 PIPE_CONFIG(ADDR_SURF_P2) | 2069 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2071 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2072 PIPE_CONFIG(ADDR_SURF_P2) | 2073 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2075 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2076 PIPE_CONFIG(ADDR_SURF_P2) | 2077 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2079 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2080 PIPE_CONFIG(ADDR_SURF_P2) | 2081 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2083 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2084 PIPE_CONFIG(ADDR_SURF_P2) | 2085 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2087 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2088 PIPE_CONFIG(ADDR_SURF_P2) | 2089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2091 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2092 PIPE_CONFIG(ADDR_SURF_P2) | 2093 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2095 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2096 PIPE_CONFIG(ADDR_SURF_P2) | 2097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2099 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2100 PIPE_CONFIG(ADDR_SURF_P2) | 2101 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2103 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2104 PIPE_CONFIG(ADDR_SURF_P2) | 2105 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2107 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2108 PIPE_CONFIG(ADDR_SURF_P2) | 2109 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2111 2112 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2115 NUM_BANKS(ADDR_SURF_8_BANK)); 2116 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2119 NUM_BANKS(ADDR_SURF_8_BANK)); 2120 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2123 NUM_BANKS(ADDR_SURF_8_BANK)); 2124 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2127 NUM_BANKS(ADDR_SURF_8_BANK)); 2128 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2131 NUM_BANKS(ADDR_SURF_8_BANK)); 2132 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2135 NUM_BANKS(ADDR_SURF_8_BANK)); 2136 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2139 NUM_BANKS(ADDR_SURF_8_BANK)); 2140 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2143 NUM_BANKS(ADDR_SURF_16_BANK)); 2144 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2147 NUM_BANKS(ADDR_SURF_16_BANK)); 2148 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2151 NUM_BANKS(ADDR_SURF_16_BANK)); 2152 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2155 NUM_BANKS(ADDR_SURF_16_BANK)); 2156 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2159 NUM_BANKS(ADDR_SURF_16_BANK)); 2160 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2163 NUM_BANKS(ADDR_SURF_16_BANK)); 2164 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2167 NUM_BANKS(ADDR_SURF_8_BANK)); 2168 2169 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2170 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2171 reg_offset != 23) 2172 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2173 2174 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2175 if (reg_offset != 7) 2176 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2177 2178 break; 2179 case CHIP_FIJI: 2180 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2181 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2183 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2184 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2185 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2188 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2189 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2191 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2192 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2193 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2195 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2196 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2197 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2200 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2201 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2202 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2203 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2204 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2205 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2208 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2209 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2210 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2212 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2213 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2214 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2215 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2216 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2218 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2219 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2220 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2222 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2223 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2224 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2226 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2227 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2230 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2231 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2234 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2235 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2238 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2242 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2246 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2247 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2248 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2250 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2251 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2254 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2256 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2258 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2260 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2262 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2263 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2264 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2266 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2267 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2268 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2270 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2271 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2274 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2278 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2282 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2286 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2288 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2290 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2292 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2294 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2296 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2298 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2299 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2300 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2302 2303 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2306 NUM_BANKS(ADDR_SURF_8_BANK)); 2307 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2310 NUM_BANKS(ADDR_SURF_8_BANK)); 2311 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2314 NUM_BANKS(ADDR_SURF_8_BANK)); 2315 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2318 NUM_BANKS(ADDR_SURF_8_BANK)); 2319 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2322 NUM_BANKS(ADDR_SURF_8_BANK)); 2323 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2326 NUM_BANKS(ADDR_SURF_8_BANK)); 2327 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2330 NUM_BANKS(ADDR_SURF_8_BANK)); 2331 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2334 NUM_BANKS(ADDR_SURF_8_BANK)); 2335 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2338 NUM_BANKS(ADDR_SURF_8_BANK)); 2339 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2342 NUM_BANKS(ADDR_SURF_8_BANK)); 2343 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2346 NUM_BANKS(ADDR_SURF_8_BANK)); 2347 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2350 NUM_BANKS(ADDR_SURF_8_BANK)); 2351 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2354 NUM_BANKS(ADDR_SURF_8_BANK)); 2355 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2358 NUM_BANKS(ADDR_SURF_4_BANK)); 2359 2360 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2361 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2362 2363 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2364 if (reg_offset != 7) 2365 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2366 2367 break; 2368 case CHIP_TONGA: 2369 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2370 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2371 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2372 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2373 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2374 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2375 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2377 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2378 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2379 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2380 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2381 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2382 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2383 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2385 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2386 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2387 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2389 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2390 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2393 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2394 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2397 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2398 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2401 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2402 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2403 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2404 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2407 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2408 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2411 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2412 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2413 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2415 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2416 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2417 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2419 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2420 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2421 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2423 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2424 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2427 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2428 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2431 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2432 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2435 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2436 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2439 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2440 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2443 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2444 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2447 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2451 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2452 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2455 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2456 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2459 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2460 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2463 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2467 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2471 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2475 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2477 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2479 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2481 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2483 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2484 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2485 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2487 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2488 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2491 2492 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2495 NUM_BANKS(ADDR_SURF_16_BANK)); 2496 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2499 NUM_BANKS(ADDR_SURF_16_BANK)); 2500 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2503 NUM_BANKS(ADDR_SURF_16_BANK)); 2504 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2507 NUM_BANKS(ADDR_SURF_16_BANK)); 2508 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2511 NUM_BANKS(ADDR_SURF_16_BANK)); 2512 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2515 NUM_BANKS(ADDR_SURF_16_BANK)); 2516 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2519 NUM_BANKS(ADDR_SURF_16_BANK)); 2520 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2523 NUM_BANKS(ADDR_SURF_16_BANK)); 2524 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2527 NUM_BANKS(ADDR_SURF_16_BANK)); 2528 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2531 NUM_BANKS(ADDR_SURF_16_BANK)); 2532 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2535 NUM_BANKS(ADDR_SURF_16_BANK)); 2536 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2539 NUM_BANKS(ADDR_SURF_8_BANK)); 2540 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2543 NUM_BANKS(ADDR_SURF_4_BANK)); 2544 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2547 NUM_BANKS(ADDR_SURF_4_BANK)); 2548 2549 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2550 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2551 2552 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2553 if (reg_offset != 7) 2554 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2555 2556 break; 2557 case CHIP_POLARIS11: 2558 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2559 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2560 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2562 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2563 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2564 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2565 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2566 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2567 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2568 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2569 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2570 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2571 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2572 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2574 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2575 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2576 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2578 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2579 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2580 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2582 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2583 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2584 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2586 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2587 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2588 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2590 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2591 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2592 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2593 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2596 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2597 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2600 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2601 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2602 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2604 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2605 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2606 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2607 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2608 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2609 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2610 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2612 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2613 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2616 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2617 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2618 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2620 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2621 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2624 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2628 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2632 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2636 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2640 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2644 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2648 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2652 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2656 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2660 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2664 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2665 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2668 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2670 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2672 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2673 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2674 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2676 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2677 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2678 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2680 2681 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2684 NUM_BANKS(ADDR_SURF_16_BANK)); 2685 2686 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2689 NUM_BANKS(ADDR_SURF_16_BANK)); 2690 2691 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2692 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2693 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2694 NUM_BANKS(ADDR_SURF_16_BANK)); 2695 2696 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2697 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2698 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2699 NUM_BANKS(ADDR_SURF_16_BANK)); 2700 2701 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2704 NUM_BANKS(ADDR_SURF_16_BANK)); 2705 2706 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2709 NUM_BANKS(ADDR_SURF_16_BANK)); 2710 2711 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2712 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2713 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2714 NUM_BANKS(ADDR_SURF_16_BANK)); 2715 2716 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2717 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2718 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2719 NUM_BANKS(ADDR_SURF_16_BANK)); 2720 2721 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2724 NUM_BANKS(ADDR_SURF_16_BANK)); 2725 2726 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2729 NUM_BANKS(ADDR_SURF_16_BANK)); 2730 2731 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2734 NUM_BANKS(ADDR_SURF_16_BANK)); 2735 2736 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2739 NUM_BANKS(ADDR_SURF_16_BANK)); 2740 2741 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2744 NUM_BANKS(ADDR_SURF_8_BANK)); 2745 2746 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2749 NUM_BANKS(ADDR_SURF_4_BANK)); 2750 2751 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2752 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2753 2754 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2755 if (reg_offset != 7) 2756 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2757 2758 break; 2759 case CHIP_POLARIS10: 2760 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2761 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2762 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2764 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2765 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2766 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2767 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2768 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2769 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2772 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2773 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2776 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2777 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2780 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2781 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2784 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2785 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2788 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2792 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2793 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2794 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2795 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2796 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2798 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2800 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2802 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2803 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2806 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2810 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2811 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2814 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2815 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2818 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2822 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2823 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2826 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2830 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2831 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2834 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2838 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2842 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2846 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2850 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2854 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2858 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2862 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2863 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2866 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2868 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2870 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2871 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2874 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2875 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2876 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2878 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2880 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2882 2883 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2886 NUM_BANKS(ADDR_SURF_16_BANK)); 2887 2888 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2889 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2890 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2891 NUM_BANKS(ADDR_SURF_16_BANK)); 2892 2893 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2894 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2895 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2896 NUM_BANKS(ADDR_SURF_16_BANK)); 2897 2898 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2899 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2900 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2901 NUM_BANKS(ADDR_SURF_16_BANK)); 2902 2903 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2906 NUM_BANKS(ADDR_SURF_16_BANK)); 2907 2908 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2911 NUM_BANKS(ADDR_SURF_16_BANK)); 2912 2913 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2916 NUM_BANKS(ADDR_SURF_16_BANK)); 2917 2918 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2921 NUM_BANKS(ADDR_SURF_16_BANK)); 2922 2923 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2926 NUM_BANKS(ADDR_SURF_16_BANK)); 2927 2928 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2931 NUM_BANKS(ADDR_SURF_16_BANK)); 2932 2933 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2936 NUM_BANKS(ADDR_SURF_16_BANK)); 2937 2938 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2941 NUM_BANKS(ADDR_SURF_8_BANK)); 2942 2943 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2946 NUM_BANKS(ADDR_SURF_4_BANK)); 2947 2948 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2951 NUM_BANKS(ADDR_SURF_4_BANK)); 2952 2953 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2954 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2955 2956 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2957 if (reg_offset != 7) 2958 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2959 2960 break; 2961 case CHIP_STONEY: 2962 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2963 PIPE_CONFIG(ADDR_SURF_P2) | 2964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2965 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2966 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2967 PIPE_CONFIG(ADDR_SURF_P2) | 2968 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2969 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2970 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2971 PIPE_CONFIG(ADDR_SURF_P2) | 2972 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2974 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2975 PIPE_CONFIG(ADDR_SURF_P2) | 2976 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2978 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2979 PIPE_CONFIG(ADDR_SURF_P2) | 2980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2982 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2983 PIPE_CONFIG(ADDR_SURF_P2) | 2984 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2986 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2987 PIPE_CONFIG(ADDR_SURF_P2) | 2988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2990 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2991 PIPE_CONFIG(ADDR_SURF_P2)); 2992 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2993 PIPE_CONFIG(ADDR_SURF_P2) | 2994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2996 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2997 PIPE_CONFIG(ADDR_SURF_P2) | 2998 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3000 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3001 PIPE_CONFIG(ADDR_SURF_P2) | 3002 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3004 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3005 PIPE_CONFIG(ADDR_SURF_P2) | 3006 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3008 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3009 PIPE_CONFIG(ADDR_SURF_P2) | 3010 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3012 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3013 PIPE_CONFIG(ADDR_SURF_P2) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3016 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3017 PIPE_CONFIG(ADDR_SURF_P2) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3020 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3021 PIPE_CONFIG(ADDR_SURF_P2) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3024 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3025 PIPE_CONFIG(ADDR_SURF_P2) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3028 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3029 PIPE_CONFIG(ADDR_SURF_P2) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3032 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3033 PIPE_CONFIG(ADDR_SURF_P2) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3036 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3037 PIPE_CONFIG(ADDR_SURF_P2) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3040 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3041 PIPE_CONFIG(ADDR_SURF_P2) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3044 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3045 PIPE_CONFIG(ADDR_SURF_P2) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3048 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3049 PIPE_CONFIG(ADDR_SURF_P2) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3052 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3053 PIPE_CONFIG(ADDR_SURF_P2) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3056 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3057 PIPE_CONFIG(ADDR_SURF_P2) | 3058 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3060 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3061 PIPE_CONFIG(ADDR_SURF_P2) | 3062 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3064 3065 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3068 NUM_BANKS(ADDR_SURF_8_BANK)); 3069 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3072 NUM_BANKS(ADDR_SURF_8_BANK)); 3073 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3076 NUM_BANKS(ADDR_SURF_8_BANK)); 3077 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3080 NUM_BANKS(ADDR_SURF_8_BANK)); 3081 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3084 NUM_BANKS(ADDR_SURF_8_BANK)); 3085 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3086 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3087 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3088 NUM_BANKS(ADDR_SURF_8_BANK)); 3089 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3090 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3091 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3092 NUM_BANKS(ADDR_SURF_8_BANK)); 3093 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3096 NUM_BANKS(ADDR_SURF_16_BANK)); 3097 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3100 NUM_BANKS(ADDR_SURF_16_BANK)); 3101 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3104 NUM_BANKS(ADDR_SURF_16_BANK)); 3105 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3108 NUM_BANKS(ADDR_SURF_16_BANK)); 3109 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3110 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3111 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3112 NUM_BANKS(ADDR_SURF_16_BANK)); 3113 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3116 NUM_BANKS(ADDR_SURF_16_BANK)); 3117 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3120 NUM_BANKS(ADDR_SURF_8_BANK)); 3121 3122 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3123 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3124 reg_offset != 23) 3125 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3126 3127 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3128 if (reg_offset != 7) 3129 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3130 3131 break; 3132 default: 3133 dev_warn(adev->dev, 3134 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3135 adev->asic_type); 3136 3137 case CHIP_CARRIZO: 3138 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3139 PIPE_CONFIG(ADDR_SURF_P2) | 3140 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3141 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3142 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3143 PIPE_CONFIG(ADDR_SURF_P2) | 3144 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3145 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3146 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3147 PIPE_CONFIG(ADDR_SURF_P2) | 3148 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3149 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3150 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3151 PIPE_CONFIG(ADDR_SURF_P2) | 3152 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3153 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3154 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3155 PIPE_CONFIG(ADDR_SURF_P2) | 3156 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3158 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3159 PIPE_CONFIG(ADDR_SURF_P2) | 3160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3161 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3162 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3163 PIPE_CONFIG(ADDR_SURF_P2) | 3164 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3165 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3166 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3167 PIPE_CONFIG(ADDR_SURF_P2)); 3168 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3169 PIPE_CONFIG(ADDR_SURF_P2) | 3170 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3172 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3173 PIPE_CONFIG(ADDR_SURF_P2) | 3174 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3176 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3177 PIPE_CONFIG(ADDR_SURF_P2) | 3178 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3180 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3184 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3188 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3192 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3196 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3200 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3204 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3205 PIPE_CONFIG(ADDR_SURF_P2) | 3206 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3208 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3209 PIPE_CONFIG(ADDR_SURF_P2) | 3210 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3212 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3213 PIPE_CONFIG(ADDR_SURF_P2) | 3214 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3216 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3217 PIPE_CONFIG(ADDR_SURF_P2) | 3218 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3220 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3221 PIPE_CONFIG(ADDR_SURF_P2) | 3222 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3224 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3225 PIPE_CONFIG(ADDR_SURF_P2) | 3226 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3228 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3229 PIPE_CONFIG(ADDR_SURF_P2) | 3230 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3232 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3233 PIPE_CONFIG(ADDR_SURF_P2) | 3234 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3236 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3237 PIPE_CONFIG(ADDR_SURF_P2) | 3238 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3240 3241 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3244 NUM_BANKS(ADDR_SURF_8_BANK)); 3245 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3248 NUM_BANKS(ADDR_SURF_8_BANK)); 3249 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3252 NUM_BANKS(ADDR_SURF_8_BANK)); 3253 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3256 NUM_BANKS(ADDR_SURF_8_BANK)); 3257 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3260 NUM_BANKS(ADDR_SURF_8_BANK)); 3261 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3264 NUM_BANKS(ADDR_SURF_8_BANK)); 3265 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3268 NUM_BANKS(ADDR_SURF_8_BANK)); 3269 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3270 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3271 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3272 NUM_BANKS(ADDR_SURF_16_BANK)); 3273 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3276 NUM_BANKS(ADDR_SURF_16_BANK)); 3277 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3280 NUM_BANKS(ADDR_SURF_16_BANK)); 3281 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3282 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3283 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3284 NUM_BANKS(ADDR_SURF_16_BANK)); 3285 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3288 NUM_BANKS(ADDR_SURF_16_BANK)); 3289 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3292 NUM_BANKS(ADDR_SURF_16_BANK)); 3293 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3294 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3295 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3296 NUM_BANKS(ADDR_SURF_8_BANK)); 3297 3298 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3299 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3300 reg_offset != 23) 3301 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3302 3303 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3304 if (reg_offset != 7) 3305 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3306 3307 break; 3308 } 3309 } 3310 3311 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) 3312 { 3313 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3314 3315 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { 3316 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3317 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3318 } else if (se_num == 0xffffffff) { 3319 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3320 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3321 } else if (sh_num == 0xffffffff) { 3322 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3323 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3324 } else { 3325 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3326 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3327 } 3328 WREG32(mmGRBM_GFX_INDEX, data); 3329 } 3330 3331 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3332 { 3333 return (u32)((1ULL << bit_width) - 1); 3334 } 3335 3336 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3337 { 3338 u32 data, mask; 3339 3340 data = RREG32(mmCC_RB_BACKEND_DISABLE); 3341 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3342 3343 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 3344 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 3345 3346 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3347 adev->gfx.config.max_sh_per_se); 3348 3349 return (~data) & mask; 3350 } 3351 3352 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3353 { 3354 int i, j; 3355 u32 data; 3356 u32 active_rbs = 0; 3357 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3358 adev->gfx.config.max_sh_per_se; 3359 3360 mutex_lock(&adev->grbm_idx_mutex); 3361 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3362 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3363 gfx_v8_0_select_se_sh(adev, i, j); 3364 data = gfx_v8_0_get_rb_active_bitmap(adev); 3365 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3366 rb_bitmap_width_per_sh); 3367 } 3368 } 3369 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 3370 mutex_unlock(&adev->grbm_idx_mutex); 3371 3372 adev->gfx.config.backend_enable_mask = active_rbs; 3373 adev->gfx.config.num_rbs = hweight32(active_rbs); 3374 } 3375 3376 /** 3377 * gfx_v8_0_init_compute_vmid - gart enable 3378 * 3379 * @rdev: amdgpu_device pointer 3380 * 3381 * Initialize compute vmid sh_mem registers 3382 * 3383 */ 3384 #define DEFAULT_SH_MEM_BASES (0x6000) 3385 #define FIRST_COMPUTE_VMID (8) 3386 #define LAST_COMPUTE_VMID (16) 3387 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3388 { 3389 int i; 3390 uint32_t sh_mem_config; 3391 uint32_t sh_mem_bases; 3392 3393 /* 3394 * Configure apertures: 3395 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3396 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3397 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3398 */ 3399 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3400 3401 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3402 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3403 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3404 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3405 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3406 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3407 3408 mutex_lock(&adev->srbm_mutex); 3409 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3410 vi_srbm_select(adev, 0, 0, 0, i); 3411 /* CP and shaders */ 3412 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3413 WREG32(mmSH_MEM_APE1_BASE, 1); 3414 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3415 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3416 } 3417 vi_srbm_select(adev, 0, 0, 0, 0); 3418 mutex_unlock(&adev->srbm_mutex); 3419 } 3420 3421 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3422 { 3423 u32 tmp; 3424 int i; 3425 3426 tmp = RREG32(mmGRBM_CNTL); 3427 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff); 3428 WREG32(mmGRBM_CNTL, tmp); 3429 3430 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3431 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3432 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3433 3434 gfx_v8_0_tiling_mode_table_init(adev); 3435 3436 gfx_v8_0_setup_rb(adev); 3437 gfx_v8_0_get_cu_info(adev); 3438 3439 /* XXX SH_MEM regs */ 3440 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3441 mutex_lock(&adev->srbm_mutex); 3442 for (i = 0; i < 16; i++) { 3443 vi_srbm_select(adev, 0, 0, 0, i); 3444 /* CP and shaders */ 3445 if (i == 0) { 3446 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3447 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3448 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3449 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3450 WREG32(mmSH_MEM_CONFIG, tmp); 3451 } else { 3452 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3453 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 3454 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3455 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3456 WREG32(mmSH_MEM_CONFIG, tmp); 3457 } 3458 3459 WREG32(mmSH_MEM_APE1_BASE, 1); 3460 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3461 WREG32(mmSH_MEM_BASES, 0); 3462 } 3463 vi_srbm_select(adev, 0, 0, 0, 0); 3464 mutex_unlock(&adev->srbm_mutex); 3465 3466 gfx_v8_0_init_compute_vmid(adev); 3467 3468 mutex_lock(&adev->grbm_idx_mutex); 3469 /* 3470 * making sure that the following register writes will be broadcasted 3471 * to all the shaders 3472 */ 3473 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 3474 3475 WREG32(mmPA_SC_FIFO_SIZE, 3476 (adev->gfx.config.sc_prim_fifo_size_frontend << 3477 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3478 (adev->gfx.config.sc_prim_fifo_size_backend << 3479 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3480 (adev->gfx.config.sc_hiz_tile_fifo_size << 3481 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3482 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3483 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3484 mutex_unlock(&adev->grbm_idx_mutex); 3485 3486 } 3487 3488 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3489 { 3490 u32 i, j, k; 3491 u32 mask; 3492 3493 mutex_lock(&adev->grbm_idx_mutex); 3494 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3495 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3496 gfx_v8_0_select_se_sh(adev, i, j); 3497 for (k = 0; k < adev->usec_timeout; k++) { 3498 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3499 break; 3500 udelay(1); 3501 } 3502 } 3503 } 3504 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 3505 mutex_unlock(&adev->grbm_idx_mutex); 3506 3507 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3508 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3509 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3510 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3511 for (k = 0; k < adev->usec_timeout; k++) { 3512 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3513 break; 3514 udelay(1); 3515 } 3516 } 3517 3518 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3519 bool enable) 3520 { 3521 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3522 3523 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3524 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3525 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3526 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3527 3528 WREG32(mmCP_INT_CNTL_RING0, tmp); 3529 } 3530 3531 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3532 { 3533 /* csib */ 3534 WREG32(mmRLC_CSIB_ADDR_HI, 3535 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3536 WREG32(mmRLC_CSIB_ADDR_LO, 3537 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3538 WREG32(mmRLC_CSIB_LENGTH, 3539 adev->gfx.rlc.clear_state_size); 3540 } 3541 3542 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3543 int ind_offset, 3544 int list_size, 3545 int *unique_indices, 3546 int *indices_count, 3547 int max_indices, 3548 int *ind_start_offsets, 3549 int *offset_count, 3550 int max_offset) 3551 { 3552 int indices; 3553 bool new_entry = true; 3554 3555 for (; ind_offset < list_size; ind_offset++) { 3556 3557 if (new_entry) { 3558 new_entry = false; 3559 ind_start_offsets[*offset_count] = ind_offset; 3560 *offset_count = *offset_count + 1; 3561 BUG_ON(*offset_count >= max_offset); 3562 } 3563 3564 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3565 new_entry = true; 3566 continue; 3567 } 3568 3569 ind_offset += 2; 3570 3571 /* look for the matching indice */ 3572 for (indices = 0; 3573 indices < *indices_count; 3574 indices++) { 3575 if (unique_indices[indices] == 3576 register_list_format[ind_offset]) 3577 break; 3578 } 3579 3580 if (indices >= *indices_count) { 3581 unique_indices[*indices_count] = 3582 register_list_format[ind_offset]; 3583 indices = *indices_count; 3584 *indices_count = *indices_count + 1; 3585 BUG_ON(*indices_count >= max_indices); 3586 } 3587 3588 register_list_format[ind_offset] = indices; 3589 } 3590 } 3591 3592 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3593 { 3594 int i, temp, data; 3595 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3596 int indices_count = 0; 3597 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3598 int offset_count = 0; 3599 3600 int list_size; 3601 unsigned int *register_list_format = 3602 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3603 if (register_list_format == NULL) 3604 return -ENOMEM; 3605 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3606 adev->gfx.rlc.reg_list_format_size_bytes); 3607 3608 gfx_v8_0_parse_ind_reg_list(register_list_format, 3609 RLC_FormatDirectRegListLength, 3610 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3611 unique_indices, 3612 &indices_count, 3613 sizeof(unique_indices) / sizeof(int), 3614 indirect_start_offsets, 3615 &offset_count, 3616 sizeof(indirect_start_offsets)/sizeof(int)); 3617 3618 /* save and restore list */ 3619 temp = RREG32(mmRLC_SRM_CNTL); 3620 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 3621 WREG32(mmRLC_SRM_CNTL, temp); 3622 3623 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3624 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3625 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3626 3627 /* indirect list */ 3628 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3629 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3630 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3631 3632 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3633 list_size = list_size >> 1; 3634 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3635 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3636 3637 /* starting offsets starts */ 3638 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3639 adev->gfx.rlc.starting_offsets_start); 3640 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3641 WREG32(mmRLC_GPM_SCRATCH_DATA, 3642 indirect_start_offsets[i]); 3643 3644 /* unique indices */ 3645 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3646 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3647 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3648 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); 3649 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); 3650 } 3651 kfree(register_list_format); 3652 3653 return 0; 3654 } 3655 3656 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3657 { 3658 uint32_t data; 3659 3660 data = RREG32(mmRLC_SRM_CNTL); 3661 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 3662 WREG32(mmRLC_SRM_CNTL, data); 3663 } 3664 3665 static void polaris11_init_power_gating(struct amdgpu_device *adev) 3666 { 3667 uint32_t data; 3668 3669 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3670 AMD_PG_SUPPORT_GFX_SMG | 3671 AMD_PG_SUPPORT_GFX_DMG)) { 3672 data = RREG32(mmCP_RB_WPTR_POLL_CNTL); 3673 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 3674 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3675 WREG32(mmCP_RB_WPTR_POLL_CNTL, data); 3676 3677 data = 0; 3678 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 3679 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 3680 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 3681 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 3682 WREG32(mmRLC_PG_DELAY, data); 3683 3684 data = RREG32(mmRLC_PG_DELAY_2); 3685 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 3686 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 3687 WREG32(mmRLC_PG_DELAY_2, data); 3688 3689 data = RREG32(mmRLC_AUTO_PG_CTRL); 3690 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 3691 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 3692 WREG32(mmRLC_AUTO_PG_CTRL, data); 3693 } 3694 } 3695 3696 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 3697 { 3698 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3699 AMD_PG_SUPPORT_GFX_SMG | 3700 AMD_PG_SUPPORT_GFX_DMG | 3701 AMD_PG_SUPPORT_CP | 3702 AMD_PG_SUPPORT_GDS | 3703 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3704 gfx_v8_0_init_csb(adev); 3705 gfx_v8_0_init_save_restore_list(adev); 3706 gfx_v8_0_enable_save_restore_machine(adev); 3707 3708 if (adev->asic_type == CHIP_POLARIS11) 3709 polaris11_init_power_gating(adev); 3710 } 3711 } 3712 3713 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 3714 { 3715 u32 tmp = RREG32(mmRLC_CNTL); 3716 3717 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 3718 WREG32(mmRLC_CNTL, tmp); 3719 3720 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 3721 3722 gfx_v8_0_wait_for_rlc_serdes(adev); 3723 } 3724 3725 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 3726 { 3727 u32 tmp = RREG32(mmGRBM_SOFT_RESET); 3728 3729 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3730 WREG32(mmGRBM_SOFT_RESET, tmp); 3731 udelay(50); 3732 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3733 WREG32(mmGRBM_SOFT_RESET, tmp); 3734 udelay(50); 3735 } 3736 3737 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 3738 { 3739 u32 tmp = RREG32(mmRLC_CNTL); 3740 3741 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1); 3742 WREG32(mmRLC_CNTL, tmp); 3743 3744 /* carrizo do enable cp interrupt after cp inited */ 3745 if (!(adev->flags & AMD_IS_APU)) 3746 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 3747 3748 udelay(50); 3749 } 3750 3751 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 3752 { 3753 const struct rlc_firmware_header_v2_0 *hdr; 3754 const __le32 *fw_data; 3755 unsigned i, fw_size; 3756 3757 if (!adev->gfx.rlc_fw) 3758 return -EINVAL; 3759 3760 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3761 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3762 3763 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3764 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3765 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3766 3767 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 3768 for (i = 0; i < fw_size; i++) 3769 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3770 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3771 3772 return 0; 3773 } 3774 3775 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 3776 { 3777 int r; 3778 3779 gfx_v8_0_rlc_stop(adev); 3780 3781 /* disable CG */ 3782 WREG32(mmRLC_CGCG_CGLS_CTRL, 0); 3783 if (adev->asic_type == CHIP_POLARIS11 || 3784 adev->asic_type == CHIP_POLARIS10) 3785 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0); 3786 3787 /* disable PG */ 3788 WREG32(mmRLC_PG_CNTL, 0); 3789 3790 gfx_v8_0_rlc_reset(adev); 3791 3792 gfx_v8_0_init_pg(adev); 3793 3794 if (!adev->pp_enabled) { 3795 if (!adev->firmware.smu_load) { 3796 /* legacy rlc firmware loading */ 3797 r = gfx_v8_0_rlc_load_microcode(adev); 3798 if (r) 3799 return r; 3800 } else { 3801 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 3802 AMDGPU_UCODE_ID_RLC_G); 3803 if (r) 3804 return -EINVAL; 3805 } 3806 } 3807 3808 gfx_v8_0_rlc_start(adev); 3809 3810 return 0; 3811 } 3812 3813 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3814 { 3815 int i; 3816 u32 tmp = RREG32(mmCP_ME_CNTL); 3817 3818 if (enable) { 3819 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 3820 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 3821 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 3822 } else { 3823 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 3824 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 3825 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 3826 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3827 adev->gfx.gfx_ring[i].ready = false; 3828 } 3829 WREG32(mmCP_ME_CNTL, tmp); 3830 udelay(50); 3831 } 3832 3833 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3834 { 3835 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3836 const struct gfx_firmware_header_v1_0 *ce_hdr; 3837 const struct gfx_firmware_header_v1_0 *me_hdr; 3838 const __le32 *fw_data; 3839 unsigned i, fw_size; 3840 3841 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3842 return -EINVAL; 3843 3844 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3845 adev->gfx.pfp_fw->data; 3846 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3847 adev->gfx.ce_fw->data; 3848 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3849 adev->gfx.me_fw->data; 3850 3851 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3852 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3853 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3854 3855 gfx_v8_0_cp_gfx_enable(adev, false); 3856 3857 /* PFP */ 3858 fw_data = (const __le32 *) 3859 (adev->gfx.pfp_fw->data + 3860 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3861 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3862 WREG32(mmCP_PFP_UCODE_ADDR, 0); 3863 for (i = 0; i < fw_size; i++) 3864 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3865 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3866 3867 /* CE */ 3868 fw_data = (const __le32 *) 3869 (adev->gfx.ce_fw->data + 3870 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3871 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3872 WREG32(mmCP_CE_UCODE_ADDR, 0); 3873 for (i = 0; i < fw_size; i++) 3874 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3875 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3876 3877 /* ME */ 3878 fw_data = (const __le32 *) 3879 (adev->gfx.me_fw->data + 3880 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3881 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3882 WREG32(mmCP_ME_RAM_WADDR, 0); 3883 for (i = 0; i < fw_size; i++) 3884 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3885 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3886 3887 return 0; 3888 } 3889 3890 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 3891 { 3892 u32 count = 0; 3893 const struct cs_section_def *sect = NULL; 3894 const struct cs_extent_def *ext = NULL; 3895 3896 /* begin clear state */ 3897 count += 2; 3898 /* context control state */ 3899 count += 3; 3900 3901 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 3902 for (ext = sect->section; ext->extent != NULL; ++ext) { 3903 if (sect->id == SECT_CONTEXT) 3904 count += 2 + ext->reg_count; 3905 else 3906 return 0; 3907 } 3908 } 3909 /* pa_sc_raster_config/pa_sc_raster_config1 */ 3910 count += 4; 3911 /* end clear state */ 3912 count += 2; 3913 /* clear state */ 3914 count += 2; 3915 3916 return count; 3917 } 3918 3919 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 3920 { 3921 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3922 const struct cs_section_def *sect = NULL; 3923 const struct cs_extent_def *ext = NULL; 3924 int r, i; 3925 3926 /* init the CP */ 3927 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3928 WREG32(mmCP_ENDIAN_SWAP, 0); 3929 WREG32(mmCP_DEVICE_ID, 1); 3930 3931 gfx_v8_0_cp_gfx_enable(adev, true); 3932 3933 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 3934 if (r) { 3935 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3936 return r; 3937 } 3938 3939 /* clear state buffer */ 3940 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3941 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3942 3943 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3944 amdgpu_ring_write(ring, 0x80000000); 3945 amdgpu_ring_write(ring, 0x80000000); 3946 3947 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 3948 for (ext = sect->section; ext->extent != NULL; ++ext) { 3949 if (sect->id == SECT_CONTEXT) { 3950 amdgpu_ring_write(ring, 3951 PACKET3(PACKET3_SET_CONTEXT_REG, 3952 ext->reg_count)); 3953 amdgpu_ring_write(ring, 3954 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3955 for (i = 0; i < ext->reg_count; i++) 3956 amdgpu_ring_write(ring, ext->extent[i]); 3957 } 3958 } 3959 } 3960 3961 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3962 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 3963 switch (adev->asic_type) { 3964 case CHIP_TONGA: 3965 case CHIP_POLARIS10: 3966 amdgpu_ring_write(ring, 0x16000012); 3967 amdgpu_ring_write(ring, 0x0000002A); 3968 break; 3969 case CHIP_POLARIS11: 3970 amdgpu_ring_write(ring, 0x16000012); 3971 amdgpu_ring_write(ring, 0x00000000); 3972 break; 3973 case CHIP_FIJI: 3974 amdgpu_ring_write(ring, 0x3a00161a); 3975 amdgpu_ring_write(ring, 0x0000002e); 3976 break; 3977 case CHIP_TOPAZ: 3978 case CHIP_CARRIZO: 3979 amdgpu_ring_write(ring, 0x00000002); 3980 amdgpu_ring_write(ring, 0x00000000); 3981 break; 3982 case CHIP_STONEY: 3983 amdgpu_ring_write(ring, 0x00000000); 3984 amdgpu_ring_write(ring, 0x00000000); 3985 break; 3986 default: 3987 BUG(); 3988 } 3989 3990 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3991 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3992 3993 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3994 amdgpu_ring_write(ring, 0); 3995 3996 /* init the CE partitions */ 3997 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3998 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3999 amdgpu_ring_write(ring, 0x8000); 4000 amdgpu_ring_write(ring, 0x8000); 4001 4002 amdgpu_ring_commit(ring); 4003 4004 return 0; 4005 } 4006 4007 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4008 { 4009 struct amdgpu_ring *ring; 4010 u32 tmp; 4011 u32 rb_bufsz; 4012 u64 rb_addr, rptr_addr; 4013 int r; 4014 4015 /* Set the write pointer delay */ 4016 WREG32(mmCP_RB_WPTR_DELAY, 0); 4017 4018 /* set the RB to use vmid 0 */ 4019 WREG32(mmCP_RB_VMID, 0); 4020 4021 /* Set ring buffer size */ 4022 ring = &adev->gfx.gfx_ring[0]; 4023 rb_bufsz = order_base_2(ring->ring_size / 8); 4024 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4025 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4026 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4027 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4028 #ifdef __BIG_ENDIAN 4029 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4030 #endif 4031 WREG32(mmCP_RB0_CNTL, tmp); 4032 4033 /* Initialize the ring buffer's read and write pointers */ 4034 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4035 ring->wptr = 0; 4036 WREG32(mmCP_RB0_WPTR, ring->wptr); 4037 4038 /* set the wb address wether it's enabled or not */ 4039 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4040 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4041 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4042 4043 mdelay(1); 4044 WREG32(mmCP_RB0_CNTL, tmp); 4045 4046 rb_addr = ring->gpu_addr >> 8; 4047 WREG32(mmCP_RB0_BASE, rb_addr); 4048 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4049 4050 /* no gfx doorbells on iceland */ 4051 if (adev->asic_type != CHIP_TOPAZ) { 4052 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4053 if (ring->use_doorbell) { 4054 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4055 DOORBELL_OFFSET, ring->doorbell_index); 4056 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4057 DOORBELL_HIT, 0); 4058 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4059 DOORBELL_EN, 1); 4060 } else { 4061 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4062 DOORBELL_EN, 0); 4063 } 4064 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4065 4066 if (adev->asic_type == CHIP_TONGA) { 4067 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4068 DOORBELL_RANGE_LOWER, 4069 AMDGPU_DOORBELL_GFX_RING0); 4070 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4071 4072 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4073 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4074 } 4075 4076 } 4077 4078 /* start the ring */ 4079 gfx_v8_0_cp_gfx_start(adev); 4080 ring->ready = true; 4081 r = amdgpu_ring_test_ring(ring); 4082 if (r) { 4083 ring->ready = false; 4084 return r; 4085 } 4086 4087 return 0; 4088 } 4089 4090 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4091 { 4092 int i; 4093 4094 if (enable) { 4095 WREG32(mmCP_MEC_CNTL, 0); 4096 } else { 4097 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4098 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4099 adev->gfx.compute_ring[i].ready = false; 4100 } 4101 udelay(50); 4102 } 4103 4104 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4105 { 4106 const struct gfx_firmware_header_v1_0 *mec_hdr; 4107 const __le32 *fw_data; 4108 unsigned i, fw_size; 4109 4110 if (!adev->gfx.mec_fw) 4111 return -EINVAL; 4112 4113 gfx_v8_0_cp_compute_enable(adev, false); 4114 4115 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4116 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4117 4118 fw_data = (const __le32 *) 4119 (adev->gfx.mec_fw->data + 4120 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4121 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4122 4123 /* MEC1 */ 4124 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4125 for (i = 0; i < fw_size; i++) 4126 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4127 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4128 4129 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4130 if (adev->gfx.mec2_fw) { 4131 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4132 4133 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4134 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4135 4136 fw_data = (const __le32 *) 4137 (adev->gfx.mec2_fw->data + 4138 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4139 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4140 4141 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4142 for (i = 0; i < fw_size; i++) 4143 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4144 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4145 } 4146 4147 return 0; 4148 } 4149 4150 struct vi_mqd { 4151 uint32_t header; /* ordinal0 */ 4152 uint32_t compute_dispatch_initiator; /* ordinal1 */ 4153 uint32_t compute_dim_x; /* ordinal2 */ 4154 uint32_t compute_dim_y; /* ordinal3 */ 4155 uint32_t compute_dim_z; /* ordinal4 */ 4156 uint32_t compute_start_x; /* ordinal5 */ 4157 uint32_t compute_start_y; /* ordinal6 */ 4158 uint32_t compute_start_z; /* ordinal7 */ 4159 uint32_t compute_num_thread_x; /* ordinal8 */ 4160 uint32_t compute_num_thread_y; /* ordinal9 */ 4161 uint32_t compute_num_thread_z; /* ordinal10 */ 4162 uint32_t compute_pipelinestat_enable; /* ordinal11 */ 4163 uint32_t compute_perfcount_enable; /* ordinal12 */ 4164 uint32_t compute_pgm_lo; /* ordinal13 */ 4165 uint32_t compute_pgm_hi; /* ordinal14 */ 4166 uint32_t compute_tba_lo; /* ordinal15 */ 4167 uint32_t compute_tba_hi; /* ordinal16 */ 4168 uint32_t compute_tma_lo; /* ordinal17 */ 4169 uint32_t compute_tma_hi; /* ordinal18 */ 4170 uint32_t compute_pgm_rsrc1; /* ordinal19 */ 4171 uint32_t compute_pgm_rsrc2; /* ordinal20 */ 4172 uint32_t compute_vmid; /* ordinal21 */ 4173 uint32_t compute_resource_limits; /* ordinal22 */ 4174 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */ 4175 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */ 4176 uint32_t compute_tmpring_size; /* ordinal25 */ 4177 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */ 4178 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */ 4179 uint32_t compute_restart_x; /* ordinal28 */ 4180 uint32_t compute_restart_y; /* ordinal29 */ 4181 uint32_t compute_restart_z; /* ordinal30 */ 4182 uint32_t compute_thread_trace_enable; /* ordinal31 */ 4183 uint32_t compute_misc_reserved; /* ordinal32 */ 4184 uint32_t compute_dispatch_id; /* ordinal33 */ 4185 uint32_t compute_threadgroup_id; /* ordinal34 */ 4186 uint32_t compute_relaunch; /* ordinal35 */ 4187 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */ 4188 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */ 4189 uint32_t compute_wave_restore_control; /* ordinal38 */ 4190 uint32_t reserved9; /* ordinal39 */ 4191 uint32_t reserved10; /* ordinal40 */ 4192 uint32_t reserved11; /* ordinal41 */ 4193 uint32_t reserved12; /* ordinal42 */ 4194 uint32_t reserved13; /* ordinal43 */ 4195 uint32_t reserved14; /* ordinal44 */ 4196 uint32_t reserved15; /* ordinal45 */ 4197 uint32_t reserved16; /* ordinal46 */ 4198 uint32_t reserved17; /* ordinal47 */ 4199 uint32_t reserved18; /* ordinal48 */ 4200 uint32_t reserved19; /* ordinal49 */ 4201 uint32_t reserved20; /* ordinal50 */ 4202 uint32_t reserved21; /* ordinal51 */ 4203 uint32_t reserved22; /* ordinal52 */ 4204 uint32_t reserved23; /* ordinal53 */ 4205 uint32_t reserved24; /* ordinal54 */ 4206 uint32_t reserved25; /* ordinal55 */ 4207 uint32_t reserved26; /* ordinal56 */ 4208 uint32_t reserved27; /* ordinal57 */ 4209 uint32_t reserved28; /* ordinal58 */ 4210 uint32_t reserved29; /* ordinal59 */ 4211 uint32_t reserved30; /* ordinal60 */ 4212 uint32_t reserved31; /* ordinal61 */ 4213 uint32_t reserved32; /* ordinal62 */ 4214 uint32_t reserved33; /* ordinal63 */ 4215 uint32_t reserved34; /* ordinal64 */ 4216 uint32_t compute_user_data_0; /* ordinal65 */ 4217 uint32_t compute_user_data_1; /* ordinal66 */ 4218 uint32_t compute_user_data_2; /* ordinal67 */ 4219 uint32_t compute_user_data_3; /* ordinal68 */ 4220 uint32_t compute_user_data_4; /* ordinal69 */ 4221 uint32_t compute_user_data_5; /* ordinal70 */ 4222 uint32_t compute_user_data_6; /* ordinal71 */ 4223 uint32_t compute_user_data_7; /* ordinal72 */ 4224 uint32_t compute_user_data_8; /* ordinal73 */ 4225 uint32_t compute_user_data_9; /* ordinal74 */ 4226 uint32_t compute_user_data_10; /* ordinal75 */ 4227 uint32_t compute_user_data_11; /* ordinal76 */ 4228 uint32_t compute_user_data_12; /* ordinal77 */ 4229 uint32_t compute_user_data_13; /* ordinal78 */ 4230 uint32_t compute_user_data_14; /* ordinal79 */ 4231 uint32_t compute_user_data_15; /* ordinal80 */ 4232 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */ 4233 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */ 4234 uint32_t reserved35; /* ordinal83 */ 4235 uint32_t reserved36; /* ordinal84 */ 4236 uint32_t reserved37; /* ordinal85 */ 4237 uint32_t cp_mqd_query_time_lo; /* ordinal86 */ 4238 uint32_t cp_mqd_query_time_hi; /* ordinal87 */ 4239 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */ 4240 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */ 4241 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */ 4242 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */ 4243 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */ 4244 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */ 4245 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */ 4246 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */ 4247 uint32_t reserved38; /* ordinal96 */ 4248 uint32_t reserved39; /* ordinal97 */ 4249 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */ 4250 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */ 4251 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */ 4252 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */ 4253 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */ 4254 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */ 4255 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */ 4256 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */ 4257 uint32_t reserved40; /* ordinal106 */ 4258 uint32_t reserved41; /* ordinal107 */ 4259 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */ 4260 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */ 4261 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */ 4262 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */ 4263 uint32_t reserved42; /* ordinal112 */ 4264 uint32_t reserved43; /* ordinal113 */ 4265 uint32_t cp_pq_exe_status_lo; /* ordinal114 */ 4266 uint32_t cp_pq_exe_status_hi; /* ordinal115 */ 4267 uint32_t cp_packet_id_lo; /* ordinal116 */ 4268 uint32_t cp_packet_id_hi; /* ordinal117 */ 4269 uint32_t cp_packet_exe_status_lo; /* ordinal118 */ 4270 uint32_t cp_packet_exe_status_hi; /* ordinal119 */ 4271 uint32_t gds_save_base_addr_lo; /* ordinal120 */ 4272 uint32_t gds_save_base_addr_hi; /* ordinal121 */ 4273 uint32_t gds_save_mask_lo; /* ordinal122 */ 4274 uint32_t gds_save_mask_hi; /* ordinal123 */ 4275 uint32_t ctx_save_base_addr_lo; /* ordinal124 */ 4276 uint32_t ctx_save_base_addr_hi; /* ordinal125 */ 4277 uint32_t reserved44; /* ordinal126 */ 4278 uint32_t reserved45; /* ordinal127 */ 4279 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */ 4280 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */ 4281 uint32_t cp_hqd_active; /* ordinal130 */ 4282 uint32_t cp_hqd_vmid; /* ordinal131 */ 4283 uint32_t cp_hqd_persistent_state; /* ordinal132 */ 4284 uint32_t cp_hqd_pipe_priority; /* ordinal133 */ 4285 uint32_t cp_hqd_queue_priority; /* ordinal134 */ 4286 uint32_t cp_hqd_quantum; /* ordinal135 */ 4287 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */ 4288 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */ 4289 uint32_t cp_hqd_pq_rptr; /* ordinal138 */ 4290 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */ 4291 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */ 4292 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */ 4293 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */ 4294 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */ 4295 uint32_t cp_hqd_pq_wptr; /* ordinal144 */ 4296 uint32_t cp_hqd_pq_control; /* ordinal145 */ 4297 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */ 4298 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */ 4299 uint32_t cp_hqd_ib_rptr; /* ordinal148 */ 4300 uint32_t cp_hqd_ib_control; /* ordinal149 */ 4301 uint32_t cp_hqd_iq_timer; /* ordinal150 */ 4302 uint32_t cp_hqd_iq_rptr; /* ordinal151 */ 4303 uint32_t cp_hqd_dequeue_request; /* ordinal152 */ 4304 uint32_t cp_hqd_dma_offload; /* ordinal153 */ 4305 uint32_t cp_hqd_sema_cmd; /* ordinal154 */ 4306 uint32_t cp_hqd_msg_type; /* ordinal155 */ 4307 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */ 4308 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */ 4309 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */ 4310 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */ 4311 uint32_t cp_hqd_hq_status0; /* ordinal160 */ 4312 uint32_t cp_hqd_hq_control0; /* ordinal161 */ 4313 uint32_t cp_mqd_control; /* ordinal162 */ 4314 uint32_t cp_hqd_hq_status1; /* ordinal163 */ 4315 uint32_t cp_hqd_hq_control1; /* ordinal164 */ 4316 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */ 4317 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */ 4318 uint32_t cp_hqd_eop_control; /* ordinal167 */ 4319 uint32_t cp_hqd_eop_rptr; /* ordinal168 */ 4320 uint32_t cp_hqd_eop_wptr; /* ordinal169 */ 4321 uint32_t cp_hqd_eop_done_events; /* ordinal170 */ 4322 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */ 4323 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */ 4324 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */ 4325 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */ 4326 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */ 4327 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */ 4328 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */ 4329 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */ 4330 uint32_t cp_hqd_error; /* ordinal179 */ 4331 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */ 4332 uint32_t cp_hqd_eop_dones; /* ordinal181 */ 4333 uint32_t reserved46; /* ordinal182 */ 4334 uint32_t reserved47; /* ordinal183 */ 4335 uint32_t reserved48; /* ordinal184 */ 4336 uint32_t reserved49; /* ordinal185 */ 4337 uint32_t reserved50; /* ordinal186 */ 4338 uint32_t reserved51; /* ordinal187 */ 4339 uint32_t reserved52; /* ordinal188 */ 4340 uint32_t reserved53; /* ordinal189 */ 4341 uint32_t reserved54; /* ordinal190 */ 4342 uint32_t reserved55; /* ordinal191 */ 4343 uint32_t iqtimer_pkt_header; /* ordinal192 */ 4344 uint32_t iqtimer_pkt_dw0; /* ordinal193 */ 4345 uint32_t iqtimer_pkt_dw1; /* ordinal194 */ 4346 uint32_t iqtimer_pkt_dw2; /* ordinal195 */ 4347 uint32_t iqtimer_pkt_dw3; /* ordinal196 */ 4348 uint32_t iqtimer_pkt_dw4; /* ordinal197 */ 4349 uint32_t iqtimer_pkt_dw5; /* ordinal198 */ 4350 uint32_t iqtimer_pkt_dw6; /* ordinal199 */ 4351 uint32_t iqtimer_pkt_dw7; /* ordinal200 */ 4352 uint32_t iqtimer_pkt_dw8; /* ordinal201 */ 4353 uint32_t iqtimer_pkt_dw9; /* ordinal202 */ 4354 uint32_t iqtimer_pkt_dw10; /* ordinal203 */ 4355 uint32_t iqtimer_pkt_dw11; /* ordinal204 */ 4356 uint32_t iqtimer_pkt_dw12; /* ordinal205 */ 4357 uint32_t iqtimer_pkt_dw13; /* ordinal206 */ 4358 uint32_t iqtimer_pkt_dw14; /* ordinal207 */ 4359 uint32_t iqtimer_pkt_dw15; /* ordinal208 */ 4360 uint32_t iqtimer_pkt_dw16; /* ordinal209 */ 4361 uint32_t iqtimer_pkt_dw17; /* ordinal210 */ 4362 uint32_t iqtimer_pkt_dw18; /* ordinal211 */ 4363 uint32_t iqtimer_pkt_dw19; /* ordinal212 */ 4364 uint32_t iqtimer_pkt_dw20; /* ordinal213 */ 4365 uint32_t iqtimer_pkt_dw21; /* ordinal214 */ 4366 uint32_t iqtimer_pkt_dw22; /* ordinal215 */ 4367 uint32_t iqtimer_pkt_dw23; /* ordinal216 */ 4368 uint32_t iqtimer_pkt_dw24; /* ordinal217 */ 4369 uint32_t iqtimer_pkt_dw25; /* ordinal218 */ 4370 uint32_t iqtimer_pkt_dw26; /* ordinal219 */ 4371 uint32_t iqtimer_pkt_dw27; /* ordinal220 */ 4372 uint32_t iqtimer_pkt_dw28; /* ordinal221 */ 4373 uint32_t iqtimer_pkt_dw29; /* ordinal222 */ 4374 uint32_t iqtimer_pkt_dw30; /* ordinal223 */ 4375 uint32_t iqtimer_pkt_dw31; /* ordinal224 */ 4376 uint32_t reserved56; /* ordinal225 */ 4377 uint32_t reserved57; /* ordinal226 */ 4378 uint32_t reserved58; /* ordinal227 */ 4379 uint32_t set_resources_header; /* ordinal228 */ 4380 uint32_t set_resources_dw1; /* ordinal229 */ 4381 uint32_t set_resources_dw2; /* ordinal230 */ 4382 uint32_t set_resources_dw3; /* ordinal231 */ 4383 uint32_t set_resources_dw4; /* ordinal232 */ 4384 uint32_t set_resources_dw5; /* ordinal233 */ 4385 uint32_t set_resources_dw6; /* ordinal234 */ 4386 uint32_t set_resources_dw7; /* ordinal235 */ 4387 uint32_t reserved59; /* ordinal236 */ 4388 uint32_t reserved60; /* ordinal237 */ 4389 uint32_t reserved61; /* ordinal238 */ 4390 uint32_t reserved62; /* ordinal239 */ 4391 uint32_t reserved63; /* ordinal240 */ 4392 uint32_t reserved64; /* ordinal241 */ 4393 uint32_t reserved65; /* ordinal242 */ 4394 uint32_t reserved66; /* ordinal243 */ 4395 uint32_t reserved67; /* ordinal244 */ 4396 uint32_t reserved68; /* ordinal245 */ 4397 uint32_t reserved69; /* ordinal246 */ 4398 uint32_t reserved70; /* ordinal247 */ 4399 uint32_t reserved71; /* ordinal248 */ 4400 uint32_t reserved72; /* ordinal249 */ 4401 uint32_t reserved73; /* ordinal250 */ 4402 uint32_t reserved74; /* ordinal251 */ 4403 uint32_t reserved75; /* ordinal252 */ 4404 uint32_t reserved76; /* ordinal253 */ 4405 uint32_t reserved77; /* ordinal254 */ 4406 uint32_t reserved78; /* ordinal255 */ 4407 4408 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */ 4409 }; 4410 4411 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4412 { 4413 int i, r; 4414 4415 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4416 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4417 4418 if (ring->mqd_obj) { 4419 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4420 if (unlikely(r != 0)) 4421 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4422 4423 amdgpu_bo_unpin(ring->mqd_obj); 4424 amdgpu_bo_unreserve(ring->mqd_obj); 4425 4426 amdgpu_bo_unref(&ring->mqd_obj); 4427 ring->mqd_obj = NULL; 4428 } 4429 } 4430 } 4431 4432 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4433 { 4434 int r, i, j; 4435 u32 tmp; 4436 bool use_doorbell = true; 4437 u64 hqd_gpu_addr; 4438 u64 mqd_gpu_addr; 4439 u64 eop_gpu_addr; 4440 u64 wb_gpu_addr; 4441 u32 *buf; 4442 struct vi_mqd *mqd; 4443 4444 /* init the pipes */ 4445 mutex_lock(&adev->srbm_mutex); 4446 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 4447 int me = (i < 4) ? 1 : 2; 4448 int pipe = (i < 4) ? i : (i - 4); 4449 4450 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 4451 eop_gpu_addr >>= 8; 4452 4453 vi_srbm_select(adev, me, pipe, 0, 0); 4454 4455 /* write the EOP addr */ 4456 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 4457 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 4458 4459 /* set the VMID assigned */ 4460 WREG32(mmCP_HQD_VMID, 0); 4461 4462 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4463 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4464 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4465 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4466 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 4467 } 4468 vi_srbm_select(adev, 0, 0, 0, 0); 4469 mutex_unlock(&adev->srbm_mutex); 4470 4471 /* init the queues. Just two for now. */ 4472 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4473 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4474 4475 if (ring->mqd_obj == NULL) { 4476 r = amdgpu_bo_create(adev, 4477 sizeof(struct vi_mqd), 4478 PAGE_SIZE, true, 4479 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 4480 NULL, &ring->mqd_obj); 4481 if (r) { 4482 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 4483 return r; 4484 } 4485 } 4486 4487 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4488 if (unlikely(r != 0)) { 4489 gfx_v8_0_cp_compute_fini(adev); 4490 return r; 4491 } 4492 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 4493 &mqd_gpu_addr); 4494 if (r) { 4495 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 4496 gfx_v8_0_cp_compute_fini(adev); 4497 return r; 4498 } 4499 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 4500 if (r) { 4501 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 4502 gfx_v8_0_cp_compute_fini(adev); 4503 return r; 4504 } 4505 4506 /* init the mqd struct */ 4507 memset(buf, 0, sizeof(struct vi_mqd)); 4508 4509 mqd = (struct vi_mqd *)buf; 4510 mqd->header = 0xC0310800; 4511 mqd->compute_pipelinestat_enable = 0x00000001; 4512 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4513 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4514 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4515 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4516 mqd->compute_misc_reserved = 0x00000003; 4517 4518 mutex_lock(&adev->srbm_mutex); 4519 vi_srbm_select(adev, ring->me, 4520 ring->pipe, 4521 ring->queue, 0); 4522 4523 /* disable wptr polling */ 4524 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4525 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4526 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4527 4528 mqd->cp_hqd_eop_base_addr_lo = 4529 RREG32(mmCP_HQD_EOP_BASE_ADDR); 4530 mqd->cp_hqd_eop_base_addr_hi = 4531 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 4532 4533 /* enable doorbell? */ 4534 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4535 if (use_doorbell) { 4536 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4537 } else { 4538 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 4539 } 4540 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 4541 mqd->cp_hqd_pq_doorbell_control = tmp; 4542 4543 /* disable the queue if it's active */ 4544 mqd->cp_hqd_dequeue_request = 0; 4545 mqd->cp_hqd_pq_rptr = 0; 4546 mqd->cp_hqd_pq_wptr= 0; 4547 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4548 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4549 for (j = 0; j < adev->usec_timeout; j++) { 4550 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4551 break; 4552 udelay(1); 4553 } 4554 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4555 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4556 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4557 } 4558 4559 /* set the pointer to the MQD */ 4560 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 4561 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4562 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4563 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4564 4565 /* set MQD vmid to 0 */ 4566 tmp = RREG32(mmCP_MQD_CONTROL); 4567 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4568 WREG32(mmCP_MQD_CONTROL, tmp); 4569 mqd->cp_mqd_control = tmp; 4570 4571 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4572 hqd_gpu_addr = ring->gpu_addr >> 8; 4573 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4574 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4575 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4576 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4577 4578 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4579 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4580 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4581 (order_base_2(ring->ring_size / 4) - 1)); 4582 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4583 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4584 #ifdef __BIG_ENDIAN 4585 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4586 #endif 4587 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4588 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4589 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4590 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4591 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 4592 mqd->cp_hqd_pq_control = tmp; 4593 4594 /* set the wb address wether it's enabled or not */ 4595 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4596 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4597 mqd->cp_hqd_pq_rptr_report_addr_hi = 4598 upper_32_bits(wb_gpu_addr) & 0xffff; 4599 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4600 mqd->cp_hqd_pq_rptr_report_addr_lo); 4601 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4602 mqd->cp_hqd_pq_rptr_report_addr_hi); 4603 4604 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4605 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4606 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4607 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4608 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr); 4609 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4610 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4611 4612 /* enable the doorbell if requested */ 4613 if (use_doorbell) { 4614 if ((adev->asic_type == CHIP_CARRIZO) || 4615 (adev->asic_type == CHIP_FIJI) || 4616 (adev->asic_type == CHIP_STONEY) || 4617 (adev->asic_type == CHIP_POLARIS11) || 4618 (adev->asic_type == CHIP_POLARIS10)) { 4619 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4620 AMDGPU_DOORBELL_KIQ << 2); 4621 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4622 AMDGPU_DOORBELL_MEC_RING7 << 2); 4623 } 4624 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4625 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4626 DOORBELL_OFFSET, ring->doorbell_index); 4627 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 4630 mqd->cp_hqd_pq_doorbell_control = tmp; 4631 4632 } else { 4633 mqd->cp_hqd_pq_doorbell_control = 0; 4634 } 4635 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 4636 mqd->cp_hqd_pq_doorbell_control); 4637 4638 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4639 ring->wptr = 0; 4640 mqd->cp_hqd_pq_wptr = ring->wptr; 4641 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4642 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4643 4644 /* set the vmid for the queue */ 4645 mqd->cp_hqd_vmid = 0; 4646 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4647 4648 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4649 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4650 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 4651 mqd->cp_hqd_persistent_state = tmp; 4652 if (adev->asic_type == CHIP_STONEY || 4653 adev->asic_type == CHIP_POLARIS11 || 4654 adev->asic_type == CHIP_POLARIS10) { 4655 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 4656 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 4657 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 4658 } 4659 4660 /* activate the queue */ 4661 mqd->cp_hqd_active = 1; 4662 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4663 4664 vi_srbm_select(adev, 0, 0, 0, 0); 4665 mutex_unlock(&adev->srbm_mutex); 4666 4667 amdgpu_bo_kunmap(ring->mqd_obj); 4668 amdgpu_bo_unreserve(ring->mqd_obj); 4669 } 4670 4671 if (use_doorbell) { 4672 tmp = RREG32(mmCP_PQ_STATUS); 4673 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4674 WREG32(mmCP_PQ_STATUS, tmp); 4675 } 4676 4677 gfx_v8_0_cp_compute_enable(adev, true); 4678 4679 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4680 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4681 4682 ring->ready = true; 4683 r = amdgpu_ring_test_ring(ring); 4684 if (r) 4685 ring->ready = false; 4686 } 4687 4688 return 0; 4689 } 4690 4691 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4692 { 4693 int r; 4694 4695 if (!(adev->flags & AMD_IS_APU)) 4696 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4697 4698 if (!adev->pp_enabled) { 4699 if (!adev->firmware.smu_load) { 4700 /* legacy firmware loading */ 4701 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4702 if (r) 4703 return r; 4704 4705 r = gfx_v8_0_cp_compute_load_microcode(adev); 4706 if (r) 4707 return r; 4708 } else { 4709 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4710 AMDGPU_UCODE_ID_CP_CE); 4711 if (r) 4712 return -EINVAL; 4713 4714 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4715 AMDGPU_UCODE_ID_CP_PFP); 4716 if (r) 4717 return -EINVAL; 4718 4719 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4720 AMDGPU_UCODE_ID_CP_ME); 4721 if (r) 4722 return -EINVAL; 4723 4724 if (adev->asic_type == CHIP_TOPAZ) { 4725 r = gfx_v8_0_cp_compute_load_microcode(adev); 4726 if (r) 4727 return r; 4728 } else { 4729 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4730 AMDGPU_UCODE_ID_CP_MEC1); 4731 if (r) 4732 return -EINVAL; 4733 } 4734 } 4735 } 4736 4737 r = gfx_v8_0_cp_gfx_resume(adev); 4738 if (r) 4739 return r; 4740 4741 r = gfx_v8_0_cp_compute_resume(adev); 4742 if (r) 4743 return r; 4744 4745 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4746 4747 return 0; 4748 } 4749 4750 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4751 { 4752 gfx_v8_0_cp_gfx_enable(adev, enable); 4753 gfx_v8_0_cp_compute_enable(adev, enable); 4754 } 4755 4756 static int gfx_v8_0_hw_init(void *handle) 4757 { 4758 int r; 4759 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4760 4761 gfx_v8_0_init_golden_registers(adev); 4762 4763 gfx_v8_0_gpu_init(adev); 4764 4765 r = gfx_v8_0_rlc_resume(adev); 4766 if (r) 4767 return r; 4768 4769 r = gfx_v8_0_cp_resume(adev); 4770 if (r) 4771 return r; 4772 4773 return r; 4774 } 4775 4776 static int gfx_v8_0_hw_fini(void *handle) 4777 { 4778 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4779 4780 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4781 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4782 gfx_v8_0_cp_enable(adev, false); 4783 gfx_v8_0_rlc_stop(adev); 4784 gfx_v8_0_cp_compute_fini(adev); 4785 4786 amdgpu_set_powergating_state(adev, 4787 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 4788 4789 return 0; 4790 } 4791 4792 static int gfx_v8_0_suspend(void *handle) 4793 { 4794 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4795 4796 return gfx_v8_0_hw_fini(adev); 4797 } 4798 4799 static int gfx_v8_0_resume(void *handle) 4800 { 4801 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4802 4803 return gfx_v8_0_hw_init(adev); 4804 } 4805 4806 static bool gfx_v8_0_is_idle(void *handle) 4807 { 4808 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4809 4810 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 4811 return false; 4812 else 4813 return true; 4814 } 4815 4816 static int gfx_v8_0_wait_for_idle(void *handle) 4817 { 4818 unsigned i; 4819 u32 tmp; 4820 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4821 4822 for (i = 0; i < adev->usec_timeout; i++) { 4823 /* read MC_STATUS */ 4824 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK; 4825 4826 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4827 return 0; 4828 udelay(1); 4829 } 4830 return -ETIMEDOUT; 4831 } 4832 4833 static int gfx_v8_0_soft_reset(void *handle) 4834 { 4835 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4836 u32 tmp; 4837 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4838 4839 /* GRBM_STATUS */ 4840 tmp = RREG32(mmGRBM_STATUS); 4841 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4842 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4843 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4844 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4845 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4846 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4847 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4848 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4849 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4850 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4851 } 4852 4853 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4854 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4855 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4856 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4857 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4858 } 4859 4860 /* GRBM_STATUS2 */ 4861 tmp = RREG32(mmGRBM_STATUS2); 4862 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4863 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4864 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4865 4866 /* SRBM_STATUS */ 4867 tmp = RREG32(mmSRBM_STATUS); 4868 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 4869 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4870 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4871 4872 if (grbm_soft_reset || srbm_soft_reset) { 4873 /* stop the rlc */ 4874 gfx_v8_0_rlc_stop(adev); 4875 4876 /* Disable GFX parsing/prefetching */ 4877 gfx_v8_0_cp_gfx_enable(adev, false); 4878 4879 /* Disable MEC parsing/prefetching */ 4880 gfx_v8_0_cp_compute_enable(adev, false); 4881 4882 if (grbm_soft_reset || srbm_soft_reset) { 4883 tmp = RREG32(mmGMCON_DEBUG); 4884 tmp = REG_SET_FIELD(tmp, 4885 GMCON_DEBUG, GFX_STALL, 1); 4886 tmp = REG_SET_FIELD(tmp, 4887 GMCON_DEBUG, GFX_CLEAR, 1); 4888 WREG32(mmGMCON_DEBUG, tmp); 4889 4890 udelay(50); 4891 } 4892 4893 if (grbm_soft_reset) { 4894 tmp = RREG32(mmGRBM_SOFT_RESET); 4895 tmp |= grbm_soft_reset; 4896 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4897 WREG32(mmGRBM_SOFT_RESET, tmp); 4898 tmp = RREG32(mmGRBM_SOFT_RESET); 4899 4900 udelay(50); 4901 4902 tmp &= ~grbm_soft_reset; 4903 WREG32(mmGRBM_SOFT_RESET, tmp); 4904 tmp = RREG32(mmGRBM_SOFT_RESET); 4905 } 4906 4907 if (srbm_soft_reset) { 4908 tmp = RREG32(mmSRBM_SOFT_RESET); 4909 tmp |= srbm_soft_reset; 4910 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4911 WREG32(mmSRBM_SOFT_RESET, tmp); 4912 tmp = RREG32(mmSRBM_SOFT_RESET); 4913 4914 udelay(50); 4915 4916 tmp &= ~srbm_soft_reset; 4917 WREG32(mmSRBM_SOFT_RESET, tmp); 4918 tmp = RREG32(mmSRBM_SOFT_RESET); 4919 } 4920 4921 if (grbm_soft_reset || srbm_soft_reset) { 4922 tmp = RREG32(mmGMCON_DEBUG); 4923 tmp = REG_SET_FIELD(tmp, 4924 GMCON_DEBUG, GFX_STALL, 0); 4925 tmp = REG_SET_FIELD(tmp, 4926 GMCON_DEBUG, GFX_CLEAR, 0); 4927 WREG32(mmGMCON_DEBUG, tmp); 4928 } 4929 4930 /* Wait a little for things to settle down */ 4931 udelay(50); 4932 } 4933 return 0; 4934 } 4935 4936 /** 4937 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 4938 * 4939 * @adev: amdgpu_device pointer 4940 * 4941 * Fetches a GPU clock counter snapshot. 4942 * Returns the 64 bit clock counter snapshot. 4943 */ 4944 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4945 { 4946 uint64_t clock; 4947 4948 mutex_lock(&adev->gfx.gpu_clock_mutex); 4949 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4950 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 4951 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4952 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4953 return clock; 4954 } 4955 4956 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4957 uint32_t vmid, 4958 uint32_t gds_base, uint32_t gds_size, 4959 uint32_t gws_base, uint32_t gws_size, 4960 uint32_t oa_base, uint32_t oa_size) 4961 { 4962 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 4963 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 4964 4965 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 4966 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 4967 4968 oa_base = oa_base >> AMDGPU_OA_SHIFT; 4969 oa_size = oa_size >> AMDGPU_OA_SHIFT; 4970 4971 /* GDS Base */ 4972 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4973 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4974 WRITE_DATA_DST_SEL(0))); 4975 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 4976 amdgpu_ring_write(ring, 0); 4977 amdgpu_ring_write(ring, gds_base); 4978 4979 /* GDS Size */ 4980 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4981 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4982 WRITE_DATA_DST_SEL(0))); 4983 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 4984 amdgpu_ring_write(ring, 0); 4985 amdgpu_ring_write(ring, gds_size); 4986 4987 /* GWS */ 4988 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4989 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4990 WRITE_DATA_DST_SEL(0))); 4991 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 4992 amdgpu_ring_write(ring, 0); 4993 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4994 4995 /* OA */ 4996 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4997 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4998 WRITE_DATA_DST_SEL(0))); 4999 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5000 amdgpu_ring_write(ring, 0); 5001 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5002 } 5003 5004 static int gfx_v8_0_early_init(void *handle) 5005 { 5006 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5007 5008 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5009 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5010 gfx_v8_0_set_ring_funcs(adev); 5011 gfx_v8_0_set_irq_funcs(adev); 5012 gfx_v8_0_set_gds_init(adev); 5013 gfx_v8_0_set_rlc_funcs(adev); 5014 5015 return 0; 5016 } 5017 5018 static int gfx_v8_0_late_init(void *handle) 5019 { 5020 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5021 int r; 5022 5023 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5024 if (r) 5025 return r; 5026 5027 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5028 if (r) 5029 return r; 5030 5031 /* requires IBs so do in late init after IB pool is initialized */ 5032 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5033 if (r) 5034 return r; 5035 5036 amdgpu_set_powergating_state(adev, 5037 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5038 5039 return 0; 5040 } 5041 5042 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5043 bool enable) 5044 { 5045 uint32_t data, temp; 5046 5047 /* Send msg to SMU via Powerplay */ 5048 amdgpu_set_powergating_state(adev, 5049 AMD_IP_BLOCK_TYPE_SMC, 5050 enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5051 5052 if (enable) { 5053 /* Enable static MGPG */ 5054 temp = data = RREG32(mmRLC_PG_CNTL); 5055 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 5056 5057 if (temp != data) 5058 WREG32(mmRLC_PG_CNTL, data); 5059 } else { 5060 temp = data = RREG32(mmRLC_PG_CNTL); 5061 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 5062 5063 if (temp != data) 5064 WREG32(mmRLC_PG_CNTL, data); 5065 } 5066 } 5067 5068 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5069 bool enable) 5070 { 5071 uint32_t data, temp; 5072 5073 if (enable) { 5074 /* Enable dynamic MGPG */ 5075 temp = data = RREG32(mmRLC_PG_CNTL); 5076 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 5077 5078 if (temp != data) 5079 WREG32(mmRLC_PG_CNTL, data); 5080 } else { 5081 temp = data = RREG32(mmRLC_PG_CNTL); 5082 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 5083 5084 if (temp != data) 5085 WREG32(mmRLC_PG_CNTL, data); 5086 } 5087 } 5088 5089 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5090 bool enable) 5091 { 5092 uint32_t data, temp; 5093 5094 if (enable) { 5095 /* Enable quick PG */ 5096 temp = data = RREG32(mmRLC_PG_CNTL); 5097 data |= 0x100000; 5098 5099 if (temp != data) 5100 WREG32(mmRLC_PG_CNTL, data); 5101 } else { 5102 temp = data = RREG32(mmRLC_PG_CNTL); 5103 data &= ~0x100000; 5104 5105 if (temp != data) 5106 WREG32(mmRLC_PG_CNTL, data); 5107 } 5108 } 5109 5110 static int gfx_v8_0_set_powergating_state(void *handle, 5111 enum amd_powergating_state state) 5112 { 5113 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5114 5115 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5116 return 0; 5117 5118 switch (adev->asic_type) { 5119 case CHIP_POLARIS11: 5120 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) 5121 polaris11_enable_gfx_static_mg_power_gating(adev, 5122 state == AMD_PG_STATE_GATE ? true : false); 5123 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) 5124 polaris11_enable_gfx_dynamic_mg_power_gating(adev, 5125 state == AMD_PG_STATE_GATE ? true : false); 5126 else 5127 polaris11_enable_gfx_quick_mg_power_gating(adev, 5128 state == AMD_PG_STATE_GATE ? true : false); 5129 break; 5130 default: 5131 break; 5132 } 5133 5134 return 0; 5135 } 5136 5137 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5138 uint32_t reg_addr, uint32_t cmd) 5139 { 5140 uint32_t data; 5141 5142 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 5143 5144 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5145 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5146 5147 data = RREG32(mmRLC_SERDES_WR_CTRL); 5148 if (adev->asic_type == CHIP_STONEY) 5149 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5150 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5151 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5152 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5153 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5154 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5155 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5156 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5157 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5158 else 5159 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5160 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5161 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5162 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5163 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5164 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5165 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5166 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5167 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5168 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5169 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5170 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5171 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5172 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5173 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5174 5175 WREG32(mmRLC_SERDES_WR_CTRL, data); 5176 } 5177 5178 #define MSG_ENTER_RLC_SAFE_MODE 1 5179 #define MSG_EXIT_RLC_SAFE_MODE 0 5180 5181 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5182 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5183 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5184 5185 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) 5186 { 5187 u32 data = 0; 5188 unsigned i; 5189 5190 data = RREG32(mmRLC_CNTL); 5191 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5192 return; 5193 5194 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5195 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5196 AMD_PG_SUPPORT_GFX_DMG))) { 5197 data |= RLC_GPR_REG2__REQ_MASK; 5198 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5199 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5200 WREG32(mmRLC_GPR_REG2, data); 5201 5202 for (i = 0; i < adev->usec_timeout; i++) { 5203 if ((RREG32(mmRLC_GPM_STAT) & 5204 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5205 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5206 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5207 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5208 break; 5209 udelay(1); 5210 } 5211 5212 for (i = 0; i < adev->usec_timeout; i++) { 5213 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) 5214 break; 5215 udelay(1); 5216 } 5217 adev->gfx.rlc.in_safe_mode = true; 5218 } 5219 } 5220 5221 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) 5222 { 5223 u32 data; 5224 unsigned i; 5225 5226 data = RREG32(mmRLC_CNTL); 5227 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5228 return; 5229 5230 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5231 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5232 AMD_PG_SUPPORT_GFX_DMG))) { 5233 data |= RLC_GPR_REG2__REQ_MASK; 5234 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5235 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5236 WREG32(mmRLC_GPR_REG2, data); 5237 adev->gfx.rlc.in_safe_mode = false; 5238 } 5239 5240 for (i = 0; i < adev->usec_timeout; i++) { 5241 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) 5242 break; 5243 udelay(1); 5244 } 5245 } 5246 5247 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5248 { 5249 u32 data; 5250 unsigned i; 5251 5252 data = RREG32(mmRLC_CNTL); 5253 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5254 return; 5255 5256 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5257 data |= RLC_SAFE_MODE__CMD_MASK; 5258 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5259 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5260 WREG32(mmRLC_SAFE_MODE, data); 5261 5262 for (i = 0; i < adev->usec_timeout; i++) { 5263 if ((RREG32(mmRLC_GPM_STAT) & 5264 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5265 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5266 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5267 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5268 break; 5269 udelay(1); 5270 } 5271 5272 for (i = 0; i < adev->usec_timeout; i++) { 5273 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) 5274 break; 5275 udelay(1); 5276 } 5277 adev->gfx.rlc.in_safe_mode = true; 5278 } 5279 } 5280 5281 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5282 { 5283 u32 data = 0; 5284 unsigned i; 5285 5286 data = RREG32(mmRLC_CNTL); 5287 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5288 return; 5289 5290 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5291 if (adev->gfx.rlc.in_safe_mode) { 5292 data |= RLC_SAFE_MODE__CMD_MASK; 5293 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5294 WREG32(mmRLC_SAFE_MODE, data); 5295 adev->gfx.rlc.in_safe_mode = false; 5296 } 5297 } 5298 5299 for (i = 0; i < adev->usec_timeout; i++) { 5300 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) 5301 break; 5302 udelay(1); 5303 } 5304 } 5305 5306 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) 5307 { 5308 adev->gfx.rlc.in_safe_mode = true; 5309 } 5310 5311 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) 5312 { 5313 adev->gfx.rlc.in_safe_mode = false; 5314 } 5315 5316 static const struct amdgpu_rlc_funcs cz_rlc_funcs = { 5317 .enter_safe_mode = cz_enter_rlc_safe_mode, 5318 .exit_safe_mode = cz_exit_rlc_safe_mode 5319 }; 5320 5321 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5322 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5323 .exit_safe_mode = iceland_exit_rlc_safe_mode 5324 }; 5325 5326 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { 5327 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, 5328 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode 5329 }; 5330 5331 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5332 bool enable) 5333 { 5334 uint32_t temp, data; 5335 5336 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5337 5338 /* It is disabled by HW by default */ 5339 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5340 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5341 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5342 /* 1 - RLC memory Light sleep */ 5343 temp = data = RREG32(mmRLC_MEM_SLP_CNTL); 5344 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5345 if (temp != data) 5346 WREG32(mmRLC_MEM_SLP_CNTL, data); 5347 } 5348 5349 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 5350 /* 2 - CP memory Light sleep */ 5351 temp = data = RREG32(mmCP_MEM_SLP_CNTL); 5352 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5353 if (temp != data) 5354 WREG32(mmCP_MEM_SLP_CNTL, data); 5355 } 5356 } 5357 5358 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5359 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5360 if (adev->flags & AMD_IS_APU) 5361 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5362 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5363 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5364 else 5365 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5366 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5367 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5368 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5369 5370 if (temp != data) 5371 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5372 5373 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5374 gfx_v8_0_wait_for_rlc_serdes(adev); 5375 5376 /* 5 - clear mgcg override */ 5377 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5378 5379 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5380 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5381 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5382 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5383 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5384 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5385 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5386 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5387 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5388 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5389 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5390 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5391 if (temp != data) 5392 WREG32(mmCGTS_SM_CTRL_REG, data); 5393 } 5394 udelay(50); 5395 5396 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5397 gfx_v8_0_wait_for_rlc_serdes(adev); 5398 } else { 5399 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5400 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5401 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5402 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5403 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5404 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5405 if (temp != data) 5406 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5407 5408 /* 2 - disable MGLS in RLC */ 5409 data = RREG32(mmRLC_MEM_SLP_CNTL); 5410 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5411 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5412 WREG32(mmRLC_MEM_SLP_CNTL, data); 5413 } 5414 5415 /* 3 - disable MGLS in CP */ 5416 data = RREG32(mmCP_MEM_SLP_CNTL); 5417 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5418 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5419 WREG32(mmCP_MEM_SLP_CNTL, data); 5420 } 5421 5422 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5423 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5424 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5425 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5426 if (temp != data) 5427 WREG32(mmCGTS_SM_CTRL_REG, data); 5428 5429 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5430 gfx_v8_0_wait_for_rlc_serdes(adev); 5431 5432 /* 6 - set mgcg override */ 5433 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5434 5435 udelay(50); 5436 5437 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5438 gfx_v8_0_wait_for_rlc_serdes(adev); 5439 } 5440 5441 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5442 } 5443 5444 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5445 bool enable) 5446 { 5447 uint32_t temp, temp1, data, data1; 5448 5449 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5450 5451 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5452 5453 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5454 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5455 * Cmp_busy/GFX_Idle interrupts 5456 */ 5457 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5458 5459 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5460 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5461 if (temp1 != data1) 5462 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5463 5464 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5465 gfx_v8_0_wait_for_rlc_serdes(adev); 5466 5467 /* 3 - clear cgcg override */ 5468 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5469 5470 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5471 gfx_v8_0_wait_for_rlc_serdes(adev); 5472 5473 /* 4 - write cmd to set CGLS */ 5474 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5475 5476 /* 5 - enable cgcg */ 5477 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5478 5479 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5480 /* enable cgls*/ 5481 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5482 5483 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5484 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5485 5486 if (temp1 != data1) 5487 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5488 } else { 5489 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5490 } 5491 5492 if (temp != data) 5493 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5494 } else { 5495 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5496 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5497 5498 /* TEST CGCG */ 5499 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5500 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5501 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5502 if (temp1 != data1) 5503 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5504 5505 /* read gfx register to wake up cgcg */ 5506 RREG32(mmCB_CGTT_SCLK_CTRL); 5507 RREG32(mmCB_CGTT_SCLK_CTRL); 5508 RREG32(mmCB_CGTT_SCLK_CTRL); 5509 RREG32(mmCB_CGTT_SCLK_CTRL); 5510 5511 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5512 gfx_v8_0_wait_for_rlc_serdes(adev); 5513 5514 /* write cmd to Set CGCG Overrride */ 5515 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5516 5517 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5518 gfx_v8_0_wait_for_rlc_serdes(adev); 5519 5520 /* write cmd to Clear CGLS */ 5521 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5522 5523 /* disable cgcg, cgls should be disabled too. */ 5524 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5525 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5526 if (temp != data) 5527 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5528 } 5529 5530 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5531 } 5532 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5533 bool enable) 5534 { 5535 if (enable) { 5536 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5537 * === MGCG + MGLS + TS(CG/LS) === 5538 */ 5539 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5540 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5541 } else { 5542 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5543 * === CGCG + CGLS === 5544 */ 5545 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5546 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5547 } 5548 return 0; 5549 } 5550 5551 static int gfx_v8_0_set_clockgating_state(void *handle, 5552 enum amd_clockgating_state state) 5553 { 5554 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5555 5556 switch (adev->asic_type) { 5557 case CHIP_FIJI: 5558 case CHIP_CARRIZO: 5559 case CHIP_STONEY: 5560 gfx_v8_0_update_gfx_clock_gating(adev, 5561 state == AMD_CG_STATE_GATE ? true : false); 5562 break; 5563 default: 5564 break; 5565 } 5566 return 0; 5567 } 5568 5569 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5570 { 5571 u32 rptr; 5572 5573 rptr = ring->adev->wb.wb[ring->rptr_offs]; 5574 5575 return rptr; 5576 } 5577 5578 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5579 { 5580 struct amdgpu_device *adev = ring->adev; 5581 u32 wptr; 5582 5583 if (ring->use_doorbell) 5584 /* XXX check if swapping is necessary on BE */ 5585 wptr = ring->adev->wb.wb[ring->wptr_offs]; 5586 else 5587 wptr = RREG32(mmCP_RB0_WPTR); 5588 5589 return wptr; 5590 } 5591 5592 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5593 { 5594 struct amdgpu_device *adev = ring->adev; 5595 5596 if (ring->use_doorbell) { 5597 /* XXX check if swapping is necessary on BE */ 5598 adev->wb.wb[ring->wptr_offs] = ring->wptr; 5599 WDOORBELL32(ring->doorbell_index, ring->wptr); 5600 } else { 5601 WREG32(mmCP_RB0_WPTR, ring->wptr); 5602 (void)RREG32(mmCP_RB0_WPTR); 5603 } 5604 } 5605 5606 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5607 { 5608 u32 ref_and_mask, reg_mem_engine; 5609 5610 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) { 5611 switch (ring->me) { 5612 case 1: 5613 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 5614 break; 5615 case 2: 5616 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 5617 break; 5618 default: 5619 return; 5620 } 5621 reg_mem_engine = 0; 5622 } else { 5623 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 5624 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 5625 } 5626 5627 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5628 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 5629 WAIT_REG_MEM_FUNCTION(3) | /* == */ 5630 reg_mem_engine)); 5631 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 5632 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 5633 amdgpu_ring_write(ring, ref_and_mask); 5634 amdgpu_ring_write(ring, ref_and_mask); 5635 amdgpu_ring_write(ring, 0x20); /* poll interval */ 5636 } 5637 5638 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 5639 { 5640 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5641 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5642 WRITE_DATA_DST_SEL(0) | 5643 WR_CONFIRM)); 5644 amdgpu_ring_write(ring, mmHDP_DEBUG0); 5645 amdgpu_ring_write(ring, 0); 5646 amdgpu_ring_write(ring, 1); 5647 5648 } 5649 5650 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5651 struct amdgpu_ib *ib, 5652 unsigned vm_id, bool ctx_switch) 5653 { 5654 u32 header, control = 0; 5655 u32 next_rptr = ring->wptr + 5; 5656 5657 if (ctx_switch) 5658 next_rptr += 2; 5659 5660 next_rptr += 4; 5661 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5662 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5663 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 5664 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 5665 amdgpu_ring_write(ring, next_rptr); 5666 5667 /* insert SWITCH_BUFFER packet before first IB in the ring frame */ 5668 if (ctx_switch) { 5669 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5670 amdgpu_ring_write(ring, 0); 5671 } 5672 5673 if (ib->flags & AMDGPU_IB_FLAG_CE) 5674 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5675 else 5676 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5677 5678 control |= ib->length_dw | (vm_id << 24); 5679 5680 amdgpu_ring_write(ring, header); 5681 amdgpu_ring_write(ring, 5682 #ifdef __BIG_ENDIAN 5683 (2 << 0) | 5684 #endif 5685 (ib->gpu_addr & 0xFFFFFFFC)); 5686 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 5687 amdgpu_ring_write(ring, control); 5688 } 5689 5690 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5691 struct amdgpu_ib *ib, 5692 unsigned vm_id, bool ctx_switch) 5693 { 5694 u32 header, control = 0; 5695 u32 next_rptr = ring->wptr + 5; 5696 5697 control |= INDIRECT_BUFFER_VALID; 5698 5699 next_rptr += 4; 5700 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5701 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5702 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 5703 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 5704 amdgpu_ring_write(ring, next_rptr); 5705 5706 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5707 5708 control |= ib->length_dw | (vm_id << 24); 5709 5710 amdgpu_ring_write(ring, header); 5711 amdgpu_ring_write(ring, 5712 #ifdef __BIG_ENDIAN 5713 (2 << 0) | 5714 #endif 5715 (ib->gpu_addr & 0xFFFFFFFC)); 5716 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 5717 amdgpu_ring_write(ring, control); 5718 } 5719 5720 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 5721 u64 seq, unsigned flags) 5722 { 5723 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5724 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5725 5726 /* EVENT_WRITE_EOP - flush caches, send int */ 5727 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 5728 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 5729 EOP_TC_ACTION_EN | 5730 EOP_TC_WB_ACTION_EN | 5731 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5732 EVENT_INDEX(5))); 5733 amdgpu_ring_write(ring, addr & 0xfffffffc); 5734 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 5735 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5736 amdgpu_ring_write(ring, lower_32_bits(seq)); 5737 amdgpu_ring_write(ring, upper_32_bits(seq)); 5738 5739 } 5740 5741 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5742 { 5743 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 5744 uint32_t seq = ring->fence_drv.sync_seq; 5745 uint64_t addr = ring->fence_drv.gpu_addr; 5746 5747 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5748 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 5749 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 5750 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 5751 amdgpu_ring_write(ring, addr & 0xfffffffc); 5752 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 5753 amdgpu_ring_write(ring, seq); 5754 amdgpu_ring_write(ring, 0xffffffff); 5755 amdgpu_ring_write(ring, 4); /* poll interval */ 5756 5757 if (usepfp) { 5758 /* synce CE with ME to prevent CE fetch CEIB before context switch done */ 5759 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5760 amdgpu_ring_write(ring, 0); 5761 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5762 amdgpu_ring_write(ring, 0); 5763 } 5764 } 5765 5766 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5767 unsigned vm_id, uint64_t pd_addr) 5768 { 5769 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 5770 5771 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5772 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5773 WRITE_DATA_DST_SEL(0)) | 5774 WR_CONFIRM); 5775 if (vm_id < 8) { 5776 amdgpu_ring_write(ring, 5777 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 5778 } else { 5779 amdgpu_ring_write(ring, 5780 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 5781 } 5782 amdgpu_ring_write(ring, 0); 5783 amdgpu_ring_write(ring, pd_addr >> 12); 5784 5785 /* bits 0-15 are the VM contexts0-15 */ 5786 /* invalidate the cache */ 5787 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5788 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5789 WRITE_DATA_DST_SEL(0))); 5790 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 5791 amdgpu_ring_write(ring, 0); 5792 amdgpu_ring_write(ring, 1 << vm_id); 5793 5794 /* wait for the invalidate to complete */ 5795 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5796 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 5797 WAIT_REG_MEM_FUNCTION(0) | /* always */ 5798 WAIT_REG_MEM_ENGINE(0))); /* me */ 5799 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 5800 amdgpu_ring_write(ring, 0); 5801 amdgpu_ring_write(ring, 0); /* ref */ 5802 amdgpu_ring_write(ring, 0); /* mask */ 5803 amdgpu_ring_write(ring, 0x20); /* poll interval */ 5804 5805 /* compute doesn't have PFP */ 5806 if (usepfp) { 5807 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5808 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5809 amdgpu_ring_write(ring, 0x0); 5810 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5811 amdgpu_ring_write(ring, 0); 5812 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5813 amdgpu_ring_write(ring, 0); 5814 } 5815 } 5816 5817 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5818 { 5819 return ring->adev->wb.wb[ring->rptr_offs]; 5820 } 5821 5822 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5823 { 5824 return ring->adev->wb.wb[ring->wptr_offs]; 5825 } 5826 5827 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5828 { 5829 struct amdgpu_device *adev = ring->adev; 5830 5831 /* XXX check if swapping is necessary on BE */ 5832 adev->wb.wb[ring->wptr_offs] = ring->wptr; 5833 WDOORBELL32(ring->doorbell_index, ring->wptr); 5834 } 5835 5836 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 5837 u64 addr, u64 seq, 5838 unsigned flags) 5839 { 5840 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5841 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5842 5843 /* RELEASE_MEM - flush caches, send int */ 5844 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 5845 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 5846 EOP_TC_ACTION_EN | 5847 EOP_TC_WB_ACTION_EN | 5848 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5849 EVENT_INDEX(5))); 5850 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5851 amdgpu_ring_write(ring, addr & 0xfffffffc); 5852 amdgpu_ring_write(ring, upper_32_bits(addr)); 5853 amdgpu_ring_write(ring, lower_32_bits(seq)); 5854 amdgpu_ring_write(ring, upper_32_bits(seq)); 5855 } 5856 5857 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5858 enum amdgpu_interrupt_state state) 5859 { 5860 u32 cp_int_cntl; 5861 5862 switch (state) { 5863 case AMDGPU_IRQ_STATE_DISABLE: 5864 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5865 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5866 TIME_STAMP_INT_ENABLE, 0); 5867 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5868 break; 5869 case AMDGPU_IRQ_STATE_ENABLE: 5870 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5871 cp_int_cntl = 5872 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5873 TIME_STAMP_INT_ENABLE, 1); 5874 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5875 break; 5876 default: 5877 break; 5878 } 5879 } 5880 5881 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5882 int me, int pipe, 5883 enum amdgpu_interrupt_state state) 5884 { 5885 u32 mec_int_cntl, mec_int_cntl_reg; 5886 5887 /* 5888 * amdgpu controls only pipe 0 of MEC1. That's why this function only 5889 * handles the setting of interrupts for this specific pipe. All other 5890 * pipes' interrupts are set by amdkfd. 5891 */ 5892 5893 if (me == 1) { 5894 switch (pipe) { 5895 case 0: 5896 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 5897 break; 5898 default: 5899 DRM_DEBUG("invalid pipe %d\n", pipe); 5900 return; 5901 } 5902 } else { 5903 DRM_DEBUG("invalid me %d\n", me); 5904 return; 5905 } 5906 5907 switch (state) { 5908 case AMDGPU_IRQ_STATE_DISABLE: 5909 mec_int_cntl = RREG32(mec_int_cntl_reg); 5910 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5911 TIME_STAMP_INT_ENABLE, 0); 5912 WREG32(mec_int_cntl_reg, mec_int_cntl); 5913 break; 5914 case AMDGPU_IRQ_STATE_ENABLE: 5915 mec_int_cntl = RREG32(mec_int_cntl_reg); 5916 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5917 TIME_STAMP_INT_ENABLE, 1); 5918 WREG32(mec_int_cntl_reg, mec_int_cntl); 5919 break; 5920 default: 5921 break; 5922 } 5923 } 5924 5925 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5926 struct amdgpu_irq_src *source, 5927 unsigned type, 5928 enum amdgpu_interrupt_state state) 5929 { 5930 u32 cp_int_cntl; 5931 5932 switch (state) { 5933 case AMDGPU_IRQ_STATE_DISABLE: 5934 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5935 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5936 PRIV_REG_INT_ENABLE, 0); 5937 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5938 break; 5939 case AMDGPU_IRQ_STATE_ENABLE: 5940 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5941 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5942 PRIV_REG_INT_ENABLE, 1); 5943 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5944 break; 5945 default: 5946 break; 5947 } 5948 5949 return 0; 5950 } 5951 5952 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5953 struct amdgpu_irq_src *source, 5954 unsigned type, 5955 enum amdgpu_interrupt_state state) 5956 { 5957 u32 cp_int_cntl; 5958 5959 switch (state) { 5960 case AMDGPU_IRQ_STATE_DISABLE: 5961 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5962 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5963 PRIV_INSTR_INT_ENABLE, 0); 5964 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5965 break; 5966 case AMDGPU_IRQ_STATE_ENABLE: 5967 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5968 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5969 PRIV_INSTR_INT_ENABLE, 1); 5970 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5971 break; 5972 default: 5973 break; 5974 } 5975 5976 return 0; 5977 } 5978 5979 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5980 struct amdgpu_irq_src *src, 5981 unsigned type, 5982 enum amdgpu_interrupt_state state) 5983 { 5984 switch (type) { 5985 case AMDGPU_CP_IRQ_GFX_EOP: 5986 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 5987 break; 5988 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5989 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5990 break; 5991 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5992 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5993 break; 5994 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5995 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5996 break; 5997 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5998 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5999 break; 6000 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6001 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6002 break; 6003 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6004 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6005 break; 6006 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6007 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6008 break; 6009 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6010 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6011 break; 6012 default: 6013 break; 6014 } 6015 return 0; 6016 } 6017 6018 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6019 struct amdgpu_irq_src *source, 6020 struct amdgpu_iv_entry *entry) 6021 { 6022 int i; 6023 u8 me_id, pipe_id, queue_id; 6024 struct amdgpu_ring *ring; 6025 6026 DRM_DEBUG("IH: CP EOP\n"); 6027 me_id = (entry->ring_id & 0x0c) >> 2; 6028 pipe_id = (entry->ring_id & 0x03) >> 0; 6029 queue_id = (entry->ring_id & 0x70) >> 4; 6030 6031 switch (me_id) { 6032 case 0: 6033 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6034 break; 6035 case 1: 6036 case 2: 6037 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6038 ring = &adev->gfx.compute_ring[i]; 6039 /* Per-queue interrupt is supported for MEC starting from VI. 6040 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6041 */ 6042 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6043 amdgpu_fence_process(ring); 6044 } 6045 break; 6046 } 6047 return 0; 6048 } 6049 6050 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6051 struct amdgpu_irq_src *source, 6052 struct amdgpu_iv_entry *entry) 6053 { 6054 DRM_ERROR("Illegal register access in command stream\n"); 6055 schedule_work(&adev->reset_work); 6056 return 0; 6057 } 6058 6059 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6060 struct amdgpu_irq_src *source, 6061 struct amdgpu_iv_entry *entry) 6062 { 6063 DRM_ERROR("Illegal instruction in command stream\n"); 6064 schedule_work(&adev->reset_work); 6065 return 0; 6066 } 6067 6068 const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6069 .name = "gfx_v8_0", 6070 .early_init = gfx_v8_0_early_init, 6071 .late_init = gfx_v8_0_late_init, 6072 .sw_init = gfx_v8_0_sw_init, 6073 .sw_fini = gfx_v8_0_sw_fini, 6074 .hw_init = gfx_v8_0_hw_init, 6075 .hw_fini = gfx_v8_0_hw_fini, 6076 .suspend = gfx_v8_0_suspend, 6077 .resume = gfx_v8_0_resume, 6078 .is_idle = gfx_v8_0_is_idle, 6079 .wait_for_idle = gfx_v8_0_wait_for_idle, 6080 .soft_reset = gfx_v8_0_soft_reset, 6081 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6082 .set_powergating_state = gfx_v8_0_set_powergating_state, 6083 }; 6084 6085 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6086 .get_rptr = gfx_v8_0_ring_get_rptr_gfx, 6087 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6088 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6089 .parse_cs = NULL, 6090 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6091 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6092 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6093 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6094 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6095 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6096 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6097 .test_ring = gfx_v8_0_ring_test_ring, 6098 .test_ib = gfx_v8_0_ring_test_ib, 6099 .insert_nop = amdgpu_ring_insert_nop, 6100 .pad_ib = amdgpu_ring_generic_pad_ib, 6101 }; 6102 6103 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6104 .get_rptr = gfx_v8_0_ring_get_rptr_compute, 6105 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6106 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6107 .parse_cs = NULL, 6108 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6109 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6110 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6111 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6112 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6113 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6114 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6115 .test_ring = gfx_v8_0_ring_test_ring, 6116 .test_ib = gfx_v8_0_ring_test_ib, 6117 .insert_nop = amdgpu_ring_insert_nop, 6118 .pad_ib = amdgpu_ring_generic_pad_ib, 6119 }; 6120 6121 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6122 { 6123 int i; 6124 6125 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6126 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6127 6128 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6129 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6130 } 6131 6132 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6133 .set = gfx_v8_0_set_eop_interrupt_state, 6134 .process = gfx_v8_0_eop_irq, 6135 }; 6136 6137 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6138 .set = gfx_v8_0_set_priv_reg_fault_state, 6139 .process = gfx_v8_0_priv_reg_irq, 6140 }; 6141 6142 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6143 .set = gfx_v8_0_set_priv_inst_fault_state, 6144 .process = gfx_v8_0_priv_inst_irq, 6145 }; 6146 6147 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6148 { 6149 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6150 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6151 6152 adev->gfx.priv_reg_irq.num_types = 1; 6153 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6154 6155 adev->gfx.priv_inst_irq.num_types = 1; 6156 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6157 } 6158 6159 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6160 { 6161 switch (adev->asic_type) { 6162 case CHIP_TOPAZ: 6163 case CHIP_STONEY: 6164 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6165 break; 6166 case CHIP_CARRIZO: 6167 adev->gfx.rlc.funcs = &cz_rlc_funcs; 6168 break; 6169 default: 6170 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; 6171 break; 6172 } 6173 } 6174 6175 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6176 { 6177 /* init asci gds info */ 6178 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6179 adev->gds.gws.total_size = 64; 6180 adev->gds.oa.total_size = 16; 6181 6182 if (adev->gds.mem.total_size == 64 * 1024) { 6183 adev->gds.mem.gfx_partition_size = 4096; 6184 adev->gds.mem.cs_partition_size = 4096; 6185 6186 adev->gds.gws.gfx_partition_size = 4; 6187 adev->gds.gws.cs_partition_size = 4; 6188 6189 adev->gds.oa.gfx_partition_size = 4; 6190 adev->gds.oa.cs_partition_size = 1; 6191 } else { 6192 adev->gds.mem.gfx_partition_size = 1024; 6193 adev->gds.mem.cs_partition_size = 1024; 6194 6195 adev->gds.gws.gfx_partition_size = 16; 6196 adev->gds.gws.cs_partition_size = 16; 6197 6198 adev->gds.oa.gfx_partition_size = 4; 6199 adev->gds.oa.cs_partition_size = 4; 6200 } 6201 } 6202 6203 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6204 { 6205 u32 data, mask; 6206 6207 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); 6208 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6209 6210 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6211 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6212 6213 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6214 6215 return (~data) & mask; 6216 } 6217 6218 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6219 { 6220 int i, j, k, counter, active_cu_number = 0; 6221 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6222 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6223 6224 memset(cu_info, 0, sizeof(*cu_info)); 6225 6226 mutex_lock(&adev->grbm_idx_mutex); 6227 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6228 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6229 mask = 1; 6230 ao_bitmap = 0; 6231 counter = 0; 6232 gfx_v8_0_select_se_sh(adev, i, j); 6233 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6234 cu_info->bitmap[i][j] = bitmap; 6235 6236 for (k = 0; k < 16; k ++) { 6237 if (bitmap & mask) { 6238 if (counter < 2) 6239 ao_bitmap |= mask; 6240 counter ++; 6241 } 6242 mask <<= 1; 6243 } 6244 active_cu_number += counter; 6245 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6246 } 6247 } 6248 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 6249 mutex_unlock(&adev->grbm_idx_mutex); 6250 6251 cu_info->number = active_cu_number; 6252 cu_info->ao_cu_mask = ao_cu_mask; 6253 } 6254