1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vi_structs.h" 29 #include "vid.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_atombios.h" 32 #include "atombios_i2c.h" 33 #include "clearstate_vi.h" 34 35 #include "gmc/gmc_8_2_d.h" 36 #include "gmc/gmc_8_2_sh_mask.h" 37 38 #include "oss/oss_3_0_d.h" 39 #include "oss/oss_3_0_sh_mask.h" 40 41 #include "bif/bif_5_0_d.h" 42 #include "bif/bif_5_0_sh_mask.h" 43 #include "gca/gfx_8_0_d.h" 44 #include "gca/gfx_8_0_enum.h" 45 #include "gca/gfx_8_0_sh_mask.h" 46 #include "gca/gfx_8_0_enum.h" 47 48 #include "dce/dce_10_0_d.h" 49 #include "dce/dce_10_0_sh_mask.h" 50 51 #include "smu/smu_7_1_3_d.h" 52 53 #define GFX8_NUM_GFX_RINGS 1 54 #define GFX8_MEC_HPD_SIZE 2048 55 56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 60 61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 70 71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 77 78 /* BPM SERDES CMD */ 79 #define SET_BPM_SERDES_CMD 1 80 #define CLE_BPM_SERDES_CMD 0 81 82 /* BPM Register Address*/ 83 enum { 84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 89 BPM_REG_FGCG_MAX 90 }; 91 92 #define RLC_FormatDirectRegListLength 14 93 94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 100 101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 119 120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 126 127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 133 134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 140 141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 147 148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 149 { 150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 166 }; 167 168 static const u32 golden_settings_tonga_a11[] = 169 { 170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 172 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 173 mmGB_GPU_ID, 0x0000000f, 0x00000000, 174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 180 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 186 }; 187 188 static const u32 tonga_golden_common_all[] = 189 { 190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 198 }; 199 200 static const u32 tonga_mgcg_cgcg_init[] = 201 { 202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 277 }; 278 279 static const u32 golden_settings_polaris11_a11[] = 280 { 281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 284 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 291 mmSQ_CONFIG, 0x07f80000, 0x01180000, 292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 293 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 298 }; 299 300 static const u32 polaris11_golden_common_all[] = 301 { 302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 308 }; 309 310 static const u32 golden_settings_polaris10_a11[] = 311 { 312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 316 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 323 mmSQ_CONFIG, 0x07f80000, 0x07180000, 324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 325 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 329 }; 330 331 static const u32 polaris10_golden_common_all[] = 332 { 333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 341 }; 342 343 static const u32 fiji_golden_common_all[] = 344 { 345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 355 }; 356 357 static const u32 golden_settings_fiji_a10[] = 358 { 359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 360 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 366 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 370 }; 371 372 static const u32 fiji_mgcg_cgcg_init[] = 373 { 374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 409 }; 410 411 static const u32 golden_settings_iceland_a11[] = 412 { 413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 414 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 415 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 416 mmGB_GPU_ID, 0x0000000f, 0x00000000, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 424 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 429 }; 430 431 static const u32 iceland_golden_common_all[] = 432 { 433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 441 }; 442 443 static const u32 iceland_mgcg_cgcg_init[] = 444 { 445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 509 }; 510 511 static const u32 cz_golden_settings_a11[] = 512 { 513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 514 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 515 mmGB_GPU_ID, 0x0000000f, 0x00000000, 516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 521 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 525 }; 526 527 static const u32 cz_golden_common_all[] = 528 { 529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 537 }; 538 539 static const u32 cz_mgcg_cgcg_init[] = 540 { 541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 616 }; 617 618 static const u32 stoney_golden_settings_a11[] = 619 { 620 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 621 mmGB_GPU_ID, 0x0000000f, 0x00000000, 622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 626 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 630 }; 631 632 static const u32 stoney_golden_common_all[] = 633 { 634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 642 }; 643 644 static const u32 stoney_mgcg_cgcg_init[] = 645 { 646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 651 }; 652 653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 661 662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 663 { 664 switch (adev->asic_type) { 665 case CHIP_TOPAZ: 666 amdgpu_program_register_sequence(adev, 667 iceland_mgcg_cgcg_init, 668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 669 amdgpu_program_register_sequence(adev, 670 golden_settings_iceland_a11, 671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 672 amdgpu_program_register_sequence(adev, 673 iceland_golden_common_all, 674 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 675 break; 676 case CHIP_FIJI: 677 amdgpu_program_register_sequence(adev, 678 fiji_mgcg_cgcg_init, 679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 680 amdgpu_program_register_sequence(adev, 681 golden_settings_fiji_a10, 682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 683 amdgpu_program_register_sequence(adev, 684 fiji_golden_common_all, 685 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 686 break; 687 688 case CHIP_TONGA: 689 amdgpu_program_register_sequence(adev, 690 tonga_mgcg_cgcg_init, 691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 692 amdgpu_program_register_sequence(adev, 693 golden_settings_tonga_a11, 694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 695 amdgpu_program_register_sequence(adev, 696 tonga_golden_common_all, 697 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 698 break; 699 case CHIP_POLARIS11: 700 case CHIP_POLARIS12: 701 amdgpu_program_register_sequence(adev, 702 golden_settings_polaris11_a11, 703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 704 amdgpu_program_register_sequence(adev, 705 polaris11_golden_common_all, 706 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 707 break; 708 case CHIP_POLARIS10: 709 amdgpu_program_register_sequence(adev, 710 golden_settings_polaris10_a11, 711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 712 amdgpu_program_register_sequence(adev, 713 polaris10_golden_common_all, 714 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 716 if (adev->pdev->revision == 0xc7 && 717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 722 } 723 break; 724 case CHIP_CARRIZO: 725 amdgpu_program_register_sequence(adev, 726 cz_mgcg_cgcg_init, 727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 728 amdgpu_program_register_sequence(adev, 729 cz_golden_settings_a11, 730 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 731 amdgpu_program_register_sequence(adev, 732 cz_golden_common_all, 733 (const u32)ARRAY_SIZE(cz_golden_common_all)); 734 break; 735 case CHIP_STONEY: 736 amdgpu_program_register_sequence(adev, 737 stoney_mgcg_cgcg_init, 738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 739 amdgpu_program_register_sequence(adev, 740 stoney_golden_settings_a11, 741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 742 amdgpu_program_register_sequence(adev, 743 stoney_golden_common_all, 744 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 745 break; 746 default: 747 break; 748 } 749 } 750 751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 752 { 753 adev->gfx.scratch.num_reg = 8; 754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 756 } 757 758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 759 { 760 struct amdgpu_device *adev = ring->adev; 761 uint32_t scratch; 762 uint32_t tmp = 0; 763 unsigned i; 764 int r; 765 766 r = amdgpu_gfx_scratch_get(adev, &scratch); 767 if (r) { 768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 769 return r; 770 } 771 WREG32(scratch, 0xCAFEDEAD); 772 r = amdgpu_ring_alloc(ring, 3); 773 if (r) { 774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 775 ring->idx, r); 776 amdgpu_gfx_scratch_free(adev, scratch); 777 return r; 778 } 779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 781 amdgpu_ring_write(ring, 0xDEADBEEF); 782 amdgpu_ring_commit(ring); 783 784 for (i = 0; i < adev->usec_timeout; i++) { 785 tmp = RREG32(scratch); 786 if (tmp == 0xDEADBEEF) 787 break; 788 DRM_UDELAY(1); 789 } 790 if (i < adev->usec_timeout) { 791 DRM_INFO("ring test on %d succeeded in %d usecs\n", 792 ring->idx, i); 793 } else { 794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 795 ring->idx, scratch, tmp); 796 r = -EINVAL; 797 } 798 amdgpu_gfx_scratch_free(adev, scratch); 799 return r; 800 } 801 802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 803 { 804 struct amdgpu_device *adev = ring->adev; 805 struct amdgpu_ib ib; 806 struct dma_fence *f = NULL; 807 uint32_t scratch; 808 uint32_t tmp = 0; 809 long r; 810 811 r = amdgpu_gfx_scratch_get(adev, &scratch); 812 if (r) { 813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 814 return r; 815 } 816 WREG32(scratch, 0xCAFEDEAD); 817 memset(&ib, 0, sizeof(ib)); 818 r = amdgpu_ib_get(adev, NULL, 256, &ib); 819 if (r) { 820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 821 goto err1; 822 } 823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 825 ib.ptr[2] = 0xDEADBEEF; 826 ib.length_dw = 3; 827 828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 829 if (r) 830 goto err2; 831 832 r = dma_fence_wait_timeout(f, false, timeout); 833 if (r == 0) { 834 DRM_ERROR("amdgpu: IB test timed out.\n"); 835 r = -ETIMEDOUT; 836 goto err2; 837 } else if (r < 0) { 838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 839 goto err2; 840 } 841 tmp = RREG32(scratch); 842 if (tmp == 0xDEADBEEF) { 843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 844 r = 0; 845 } else { 846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 847 scratch, tmp); 848 r = -EINVAL; 849 } 850 err2: 851 amdgpu_ib_free(adev, &ib, NULL); 852 dma_fence_put(f); 853 err1: 854 amdgpu_gfx_scratch_free(adev, scratch); 855 return r; 856 } 857 858 859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 860 { 861 release_firmware(adev->gfx.pfp_fw); 862 adev->gfx.pfp_fw = NULL; 863 release_firmware(adev->gfx.me_fw); 864 adev->gfx.me_fw = NULL; 865 release_firmware(adev->gfx.ce_fw); 866 adev->gfx.ce_fw = NULL; 867 release_firmware(adev->gfx.rlc_fw); 868 adev->gfx.rlc_fw = NULL; 869 release_firmware(adev->gfx.mec_fw); 870 adev->gfx.mec_fw = NULL; 871 if ((adev->asic_type != CHIP_STONEY) && 872 (adev->asic_type != CHIP_TOPAZ)) 873 release_firmware(adev->gfx.mec2_fw); 874 adev->gfx.mec2_fw = NULL; 875 876 kfree(adev->gfx.rlc.register_list_format); 877 } 878 879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 880 { 881 const char *chip_name; 882 char fw_name[30]; 883 int err; 884 struct amdgpu_firmware_info *info = NULL; 885 const struct common_firmware_header *header = NULL; 886 const struct gfx_firmware_header_v1_0 *cp_hdr; 887 const struct rlc_firmware_header_v2_0 *rlc_hdr; 888 unsigned int *tmp = NULL, i; 889 890 DRM_DEBUG("\n"); 891 892 switch (adev->asic_type) { 893 case CHIP_TOPAZ: 894 chip_name = "topaz"; 895 break; 896 case CHIP_TONGA: 897 chip_name = "tonga"; 898 break; 899 case CHIP_CARRIZO: 900 chip_name = "carrizo"; 901 break; 902 case CHIP_FIJI: 903 chip_name = "fiji"; 904 break; 905 case CHIP_POLARIS11: 906 chip_name = "polaris11"; 907 break; 908 case CHIP_POLARIS10: 909 chip_name = "polaris10"; 910 break; 911 case CHIP_POLARIS12: 912 chip_name = "polaris12"; 913 break; 914 case CHIP_STONEY: 915 chip_name = "stoney"; 916 break; 917 default: 918 BUG(); 919 } 920 921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 923 if (err) 924 goto out; 925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 926 if (err) 927 goto out; 928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 931 932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 934 if (err) 935 goto out; 936 err = amdgpu_ucode_validate(adev->gfx.me_fw); 937 if (err) 938 goto out; 939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 941 942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 943 944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 946 if (err) 947 goto out; 948 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 949 if (err) 950 goto out; 951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 954 955 /* 956 * Support for MCBP/Virtualization in combination with chained IBs is 957 * formal released on feature version #46 958 */ 959 if (adev->gfx.ce_feature_version >= 46 && 960 adev->gfx.pfp_feature_version >= 46) { 961 adev->virt.chained_ib_support = true; 962 DRM_INFO("Chained IB support enabled!\n"); 963 } else 964 adev->virt.chained_ib_support = false; 965 966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 968 if (err) 969 goto out; 970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 974 975 adev->gfx.rlc.save_and_restore_offset = 976 le32_to_cpu(rlc_hdr->save_and_restore_offset); 977 adev->gfx.rlc.clear_state_descriptor_offset = 978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 979 adev->gfx.rlc.avail_scratch_ram_locations = 980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 981 adev->gfx.rlc.reg_restore_list_size = 982 le32_to_cpu(rlc_hdr->reg_restore_list_size); 983 adev->gfx.rlc.reg_list_format_start = 984 le32_to_cpu(rlc_hdr->reg_list_format_start); 985 adev->gfx.rlc.reg_list_format_separate_start = 986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 987 adev->gfx.rlc.starting_offsets_start = 988 le32_to_cpu(rlc_hdr->starting_offsets_start); 989 adev->gfx.rlc.reg_list_format_size_bytes = 990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 991 adev->gfx.rlc.reg_list_size_bytes = 992 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 993 994 adev->gfx.rlc.register_list_format = 995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 997 998 if (!adev->gfx.rlc.register_list_format) { 999 err = -ENOMEM; 1000 goto out; 1001 } 1002 1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1007 1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1009 1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1014 1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if ((adev->asic_type != CHIP_STONEY) && 1027 (adev->asic_type != CHIP_TOPAZ)) { 1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1030 if (!err) { 1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1032 if (err) 1033 goto out; 1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1035 adev->gfx.mec2_fw->data; 1036 adev->gfx.mec2_fw_version = 1037 le32_to_cpu(cp_hdr->header.ucode_version); 1038 adev->gfx.mec2_feature_version = 1039 le32_to_cpu(cp_hdr->ucode_feature_version); 1040 } else { 1041 err = 0; 1042 adev->gfx.mec2_fw = NULL; 1043 } 1044 } 1045 1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1049 info->fw = adev->gfx.pfp_fw; 1050 header = (const struct common_firmware_header *)info->fw->data; 1051 adev->firmware.fw_size += 1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1053 1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1056 info->fw = adev->gfx.me_fw; 1057 header = (const struct common_firmware_header *)info->fw->data; 1058 adev->firmware.fw_size += 1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1060 1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1063 info->fw = adev->gfx.ce_fw; 1064 header = (const struct common_firmware_header *)info->fw->data; 1065 adev->firmware.fw_size += 1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1067 1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1070 info->fw = adev->gfx.rlc_fw; 1071 header = (const struct common_firmware_header *)info->fw->data; 1072 adev->firmware.fw_size += 1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1074 1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1077 info->fw = adev->gfx.mec_fw; 1078 header = (const struct common_firmware_header *)info->fw->data; 1079 adev->firmware.fw_size += 1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1081 1082 /* we need account JT in */ 1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1084 adev->firmware.fw_size += 1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1086 1087 if (amdgpu_sriov_vf(adev)) { 1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1090 info->fw = adev->gfx.mec_fw; 1091 adev->firmware.fw_size += 1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1093 } 1094 1095 if (adev->gfx.mec2_fw) { 1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1098 info->fw = adev->gfx.mec2_fw; 1099 header = (const struct common_firmware_header *)info->fw->data; 1100 adev->firmware.fw_size += 1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1102 } 1103 1104 } 1105 1106 out: 1107 if (err) { 1108 dev_err(adev->dev, 1109 "gfx8: Failed to load firmware \"%s\"\n", 1110 fw_name); 1111 release_firmware(adev->gfx.pfp_fw); 1112 adev->gfx.pfp_fw = NULL; 1113 release_firmware(adev->gfx.me_fw); 1114 adev->gfx.me_fw = NULL; 1115 release_firmware(adev->gfx.ce_fw); 1116 adev->gfx.ce_fw = NULL; 1117 release_firmware(adev->gfx.rlc_fw); 1118 adev->gfx.rlc_fw = NULL; 1119 release_firmware(adev->gfx.mec_fw); 1120 adev->gfx.mec_fw = NULL; 1121 release_firmware(adev->gfx.mec2_fw); 1122 adev->gfx.mec2_fw = NULL; 1123 } 1124 return err; 1125 } 1126 1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1128 volatile u32 *buffer) 1129 { 1130 u32 count = 0, i; 1131 const struct cs_section_def *sect = NULL; 1132 const struct cs_extent_def *ext = NULL; 1133 1134 if (adev->gfx.rlc.cs_data == NULL) 1135 return; 1136 if (buffer == NULL) 1137 return; 1138 1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1141 1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1143 buffer[count++] = cpu_to_le32(0x80000000); 1144 buffer[count++] = cpu_to_le32(0x80000000); 1145 1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1147 for (ext = sect->section; ext->extent != NULL; ++ext) { 1148 if (sect->id == SECT_CONTEXT) { 1149 buffer[count++] = 1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1151 buffer[count++] = cpu_to_le32(ext->reg_index - 1152 PACKET3_SET_CONTEXT_REG_START); 1153 for (i = 0; i < ext->reg_count; i++) 1154 buffer[count++] = cpu_to_le32(ext->extent[i]); 1155 } else { 1156 return; 1157 } 1158 } 1159 } 1160 1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1163 PACKET3_SET_CONTEXT_REG_START); 1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1166 1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1169 1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1171 buffer[count++] = cpu_to_le32(0); 1172 } 1173 1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1175 { 1176 const __le32 *fw_data; 1177 volatile u32 *dst_ptr; 1178 int me, i, max_me = 4; 1179 u32 bo_offset = 0; 1180 u32 table_offset, table_size; 1181 1182 if (adev->asic_type == CHIP_CARRIZO) 1183 max_me = 5; 1184 1185 /* write the cp table buffer */ 1186 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1187 for (me = 0; me < max_me; me++) { 1188 if (me == 0) { 1189 const struct gfx_firmware_header_v1_0 *hdr = 1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1191 fw_data = (const __le32 *) 1192 (adev->gfx.ce_fw->data + 1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1194 table_offset = le32_to_cpu(hdr->jt_offset); 1195 table_size = le32_to_cpu(hdr->jt_size); 1196 } else if (me == 1) { 1197 const struct gfx_firmware_header_v1_0 *hdr = 1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1199 fw_data = (const __le32 *) 1200 (adev->gfx.pfp_fw->data + 1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1202 table_offset = le32_to_cpu(hdr->jt_offset); 1203 table_size = le32_to_cpu(hdr->jt_size); 1204 } else if (me == 2) { 1205 const struct gfx_firmware_header_v1_0 *hdr = 1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1207 fw_data = (const __le32 *) 1208 (adev->gfx.me_fw->data + 1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1210 table_offset = le32_to_cpu(hdr->jt_offset); 1211 table_size = le32_to_cpu(hdr->jt_size); 1212 } else if (me == 3) { 1213 const struct gfx_firmware_header_v1_0 *hdr = 1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1215 fw_data = (const __le32 *) 1216 (adev->gfx.mec_fw->data + 1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1218 table_offset = le32_to_cpu(hdr->jt_offset); 1219 table_size = le32_to_cpu(hdr->jt_size); 1220 } else if (me == 4) { 1221 const struct gfx_firmware_header_v1_0 *hdr = 1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1223 fw_data = (const __le32 *) 1224 (adev->gfx.mec2_fw->data + 1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1226 table_offset = le32_to_cpu(hdr->jt_offset); 1227 table_size = le32_to_cpu(hdr->jt_size); 1228 } 1229 1230 for (i = 0; i < table_size; i ++) { 1231 dst_ptr[bo_offset + i] = 1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1233 } 1234 1235 bo_offset += table_size; 1236 } 1237 } 1238 1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1240 { 1241 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1242 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1243 } 1244 1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1246 { 1247 volatile u32 *dst_ptr; 1248 u32 dws; 1249 const struct cs_section_def *cs_data; 1250 int r; 1251 1252 adev->gfx.rlc.cs_data = vi_cs_data; 1253 1254 cs_data = adev->gfx.rlc.cs_data; 1255 1256 if (cs_data) { 1257 /* clear state block */ 1258 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1259 1260 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1261 AMDGPU_GEM_DOMAIN_VRAM, 1262 &adev->gfx.rlc.clear_state_obj, 1263 &adev->gfx.rlc.clear_state_gpu_addr, 1264 (void **)&adev->gfx.rlc.cs_ptr); 1265 if (r) { 1266 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1267 gfx_v8_0_rlc_fini(adev); 1268 return r; 1269 } 1270 1271 /* set up the cs buffer */ 1272 dst_ptr = adev->gfx.rlc.cs_ptr; 1273 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1274 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1275 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1276 } 1277 1278 if ((adev->asic_type == CHIP_CARRIZO) || 1279 (adev->asic_type == CHIP_STONEY)) { 1280 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1281 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1282 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1283 &adev->gfx.rlc.cp_table_obj, 1284 &adev->gfx.rlc.cp_table_gpu_addr, 1285 (void **)&adev->gfx.rlc.cp_table_ptr); 1286 if (r) { 1287 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1288 return r; 1289 } 1290 1291 cz_init_cp_jump_table(adev); 1292 1293 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1294 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1295 } 1296 1297 return 0; 1298 } 1299 1300 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1301 { 1302 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1303 } 1304 1305 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1306 { 1307 int r; 1308 u32 *hpd; 1309 size_t mec_hpd_size; 1310 1311 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1312 1313 /* take ownership of the relevant compute queues */ 1314 amdgpu_gfx_compute_queue_acquire(adev); 1315 1316 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1317 1318 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1319 AMDGPU_GEM_DOMAIN_GTT, 1320 &adev->gfx.mec.hpd_eop_obj, 1321 &adev->gfx.mec.hpd_eop_gpu_addr, 1322 (void **)&hpd); 1323 if (r) { 1324 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1325 return r; 1326 } 1327 1328 memset(hpd, 0, mec_hpd_size); 1329 1330 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1331 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1332 1333 return 0; 1334 } 1335 1336 static const u32 vgpr_init_compute_shader[] = 1337 { 1338 0x7e000209, 0x7e020208, 1339 0x7e040207, 0x7e060206, 1340 0x7e080205, 0x7e0a0204, 1341 0x7e0c0203, 0x7e0e0202, 1342 0x7e100201, 0x7e120200, 1343 0x7e140209, 0x7e160208, 1344 0x7e180207, 0x7e1a0206, 1345 0x7e1c0205, 0x7e1e0204, 1346 0x7e200203, 0x7e220202, 1347 0x7e240201, 0x7e260200, 1348 0x7e280209, 0x7e2a0208, 1349 0x7e2c0207, 0x7e2e0206, 1350 0x7e300205, 0x7e320204, 1351 0x7e340203, 0x7e360202, 1352 0x7e380201, 0x7e3a0200, 1353 0x7e3c0209, 0x7e3e0208, 1354 0x7e400207, 0x7e420206, 1355 0x7e440205, 0x7e460204, 1356 0x7e480203, 0x7e4a0202, 1357 0x7e4c0201, 0x7e4e0200, 1358 0x7e500209, 0x7e520208, 1359 0x7e540207, 0x7e560206, 1360 0x7e580205, 0x7e5a0204, 1361 0x7e5c0203, 0x7e5e0202, 1362 0x7e600201, 0x7e620200, 1363 0x7e640209, 0x7e660208, 1364 0x7e680207, 0x7e6a0206, 1365 0x7e6c0205, 0x7e6e0204, 1366 0x7e700203, 0x7e720202, 1367 0x7e740201, 0x7e760200, 1368 0x7e780209, 0x7e7a0208, 1369 0x7e7c0207, 0x7e7e0206, 1370 0xbf8a0000, 0xbf810000, 1371 }; 1372 1373 static const u32 sgpr_init_compute_shader[] = 1374 { 1375 0xbe8a0100, 0xbe8c0102, 1376 0xbe8e0104, 0xbe900106, 1377 0xbe920108, 0xbe940100, 1378 0xbe960102, 0xbe980104, 1379 0xbe9a0106, 0xbe9c0108, 1380 0xbe9e0100, 0xbea00102, 1381 0xbea20104, 0xbea40106, 1382 0xbea60108, 0xbea80100, 1383 0xbeaa0102, 0xbeac0104, 1384 0xbeae0106, 0xbeb00108, 1385 0xbeb20100, 0xbeb40102, 1386 0xbeb60104, 0xbeb80106, 1387 0xbeba0108, 0xbebc0100, 1388 0xbebe0102, 0xbec00104, 1389 0xbec20106, 0xbec40108, 1390 0xbec60100, 0xbec80102, 1391 0xbee60004, 0xbee70005, 1392 0xbeea0006, 0xbeeb0007, 1393 0xbee80008, 0xbee90009, 1394 0xbefc0000, 0xbf8a0000, 1395 0xbf810000, 0x00000000, 1396 }; 1397 1398 static const u32 vgpr_init_regs[] = 1399 { 1400 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1401 mmCOMPUTE_RESOURCE_LIMITS, 0, 1402 mmCOMPUTE_NUM_THREAD_X, 256*4, 1403 mmCOMPUTE_NUM_THREAD_Y, 1, 1404 mmCOMPUTE_NUM_THREAD_Z, 1, 1405 mmCOMPUTE_PGM_RSRC2, 20, 1406 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1407 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1408 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1409 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1410 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1411 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1412 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1413 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1414 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1415 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1416 }; 1417 1418 static const u32 sgpr1_init_regs[] = 1419 { 1420 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1421 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1422 mmCOMPUTE_NUM_THREAD_X, 256*5, 1423 mmCOMPUTE_NUM_THREAD_Y, 1, 1424 mmCOMPUTE_NUM_THREAD_Z, 1, 1425 mmCOMPUTE_PGM_RSRC2, 20, 1426 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1427 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1428 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1429 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1430 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1431 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1432 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1433 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1434 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1435 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1436 }; 1437 1438 static const u32 sgpr2_init_regs[] = 1439 { 1440 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1441 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1442 mmCOMPUTE_NUM_THREAD_X, 256*5, 1443 mmCOMPUTE_NUM_THREAD_Y, 1, 1444 mmCOMPUTE_NUM_THREAD_Z, 1, 1445 mmCOMPUTE_PGM_RSRC2, 20, 1446 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1447 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1448 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1449 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1450 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1451 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1452 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1453 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1454 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1455 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1456 }; 1457 1458 static const u32 sec_ded_counter_registers[] = 1459 { 1460 mmCPC_EDC_ATC_CNT, 1461 mmCPC_EDC_SCRATCH_CNT, 1462 mmCPC_EDC_UCODE_CNT, 1463 mmCPF_EDC_ATC_CNT, 1464 mmCPF_EDC_ROQ_CNT, 1465 mmCPF_EDC_TAG_CNT, 1466 mmCPG_EDC_ATC_CNT, 1467 mmCPG_EDC_DMA_CNT, 1468 mmCPG_EDC_TAG_CNT, 1469 mmDC_EDC_CSINVOC_CNT, 1470 mmDC_EDC_RESTORE_CNT, 1471 mmDC_EDC_STATE_CNT, 1472 mmGDS_EDC_CNT, 1473 mmGDS_EDC_GRBM_CNT, 1474 mmGDS_EDC_OA_DED, 1475 mmSPI_EDC_CNT, 1476 mmSQC_ATC_EDC_GATCL1_CNT, 1477 mmSQC_EDC_CNT, 1478 mmSQ_EDC_DED_CNT, 1479 mmSQ_EDC_INFO, 1480 mmSQ_EDC_SEC_CNT, 1481 mmTCC_EDC_CNT, 1482 mmTCP_ATC_EDC_GATCL1_CNT, 1483 mmTCP_EDC_CNT, 1484 mmTD_EDC_CNT 1485 }; 1486 1487 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1488 { 1489 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1490 struct amdgpu_ib ib; 1491 struct dma_fence *f = NULL; 1492 int r, i; 1493 u32 tmp; 1494 unsigned total_size, vgpr_offset, sgpr_offset; 1495 u64 gpu_addr; 1496 1497 /* only supported on CZ */ 1498 if (adev->asic_type != CHIP_CARRIZO) 1499 return 0; 1500 1501 /* bail if the compute ring is not ready */ 1502 if (!ring->ready) 1503 return 0; 1504 1505 tmp = RREG32(mmGB_EDC_MODE); 1506 WREG32(mmGB_EDC_MODE, 0); 1507 1508 total_size = 1509 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1510 total_size += 1511 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1512 total_size += 1513 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1514 total_size = ALIGN(total_size, 256); 1515 vgpr_offset = total_size; 1516 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1517 sgpr_offset = total_size; 1518 total_size += sizeof(sgpr_init_compute_shader); 1519 1520 /* allocate an indirect buffer to put the commands in */ 1521 memset(&ib, 0, sizeof(ib)); 1522 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1523 if (r) { 1524 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1525 return r; 1526 } 1527 1528 /* load the compute shaders */ 1529 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1530 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1531 1532 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1533 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1534 1535 /* init the ib length to 0 */ 1536 ib.length_dw = 0; 1537 1538 /* VGPR */ 1539 /* write the register state for the compute dispatch */ 1540 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1541 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1542 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1543 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1544 } 1545 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1546 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1547 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1548 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1549 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1550 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1551 1552 /* write dispatch packet */ 1553 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1554 ib.ptr[ib.length_dw++] = 8; /* x */ 1555 ib.ptr[ib.length_dw++] = 1; /* y */ 1556 ib.ptr[ib.length_dw++] = 1; /* z */ 1557 ib.ptr[ib.length_dw++] = 1558 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1559 1560 /* write CS partial flush packet */ 1561 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1562 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1563 1564 /* SGPR1 */ 1565 /* write the register state for the compute dispatch */ 1566 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1567 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1568 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1569 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1570 } 1571 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1572 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1573 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1574 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1575 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1576 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1577 1578 /* write dispatch packet */ 1579 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1580 ib.ptr[ib.length_dw++] = 8; /* x */ 1581 ib.ptr[ib.length_dw++] = 1; /* y */ 1582 ib.ptr[ib.length_dw++] = 1; /* z */ 1583 ib.ptr[ib.length_dw++] = 1584 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1585 1586 /* write CS partial flush packet */ 1587 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1588 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1589 1590 /* SGPR2 */ 1591 /* write the register state for the compute dispatch */ 1592 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1593 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1594 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1595 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1596 } 1597 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1598 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1599 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1600 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1601 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1602 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1603 1604 /* write dispatch packet */ 1605 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1606 ib.ptr[ib.length_dw++] = 8; /* x */ 1607 ib.ptr[ib.length_dw++] = 1; /* y */ 1608 ib.ptr[ib.length_dw++] = 1; /* z */ 1609 ib.ptr[ib.length_dw++] = 1610 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1611 1612 /* write CS partial flush packet */ 1613 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1614 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1615 1616 /* shedule the ib on the ring */ 1617 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1618 if (r) { 1619 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1620 goto fail; 1621 } 1622 1623 /* wait for the GPU to finish processing the IB */ 1624 r = dma_fence_wait(f, false); 1625 if (r) { 1626 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1627 goto fail; 1628 } 1629 1630 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1631 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1632 WREG32(mmGB_EDC_MODE, tmp); 1633 1634 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1635 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1636 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1637 1638 1639 /* read back registers to clear the counters */ 1640 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1641 RREG32(sec_ded_counter_registers[i]); 1642 1643 fail: 1644 amdgpu_ib_free(adev, &ib, NULL); 1645 dma_fence_put(f); 1646 1647 return r; 1648 } 1649 1650 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1651 { 1652 u32 gb_addr_config; 1653 u32 mc_shared_chmap, mc_arb_ramcfg; 1654 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1655 u32 tmp; 1656 int ret; 1657 1658 switch (adev->asic_type) { 1659 case CHIP_TOPAZ: 1660 adev->gfx.config.max_shader_engines = 1; 1661 adev->gfx.config.max_tile_pipes = 2; 1662 adev->gfx.config.max_cu_per_sh = 6; 1663 adev->gfx.config.max_sh_per_se = 1; 1664 adev->gfx.config.max_backends_per_se = 2; 1665 adev->gfx.config.max_texture_channel_caches = 2; 1666 adev->gfx.config.max_gprs = 256; 1667 adev->gfx.config.max_gs_threads = 32; 1668 adev->gfx.config.max_hw_contexts = 8; 1669 1670 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1671 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1672 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1673 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1674 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1675 break; 1676 case CHIP_FIJI: 1677 adev->gfx.config.max_shader_engines = 4; 1678 adev->gfx.config.max_tile_pipes = 16; 1679 adev->gfx.config.max_cu_per_sh = 16; 1680 adev->gfx.config.max_sh_per_se = 1; 1681 adev->gfx.config.max_backends_per_se = 4; 1682 adev->gfx.config.max_texture_channel_caches = 16; 1683 adev->gfx.config.max_gprs = 256; 1684 adev->gfx.config.max_gs_threads = 32; 1685 adev->gfx.config.max_hw_contexts = 8; 1686 1687 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1688 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1689 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1690 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1691 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1692 break; 1693 case CHIP_POLARIS11: 1694 case CHIP_POLARIS12: 1695 ret = amdgpu_atombios_get_gfx_info(adev); 1696 if (ret) 1697 return ret; 1698 adev->gfx.config.max_gprs = 256; 1699 adev->gfx.config.max_gs_threads = 32; 1700 adev->gfx.config.max_hw_contexts = 8; 1701 1702 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1703 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1704 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1705 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1706 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1707 break; 1708 case CHIP_POLARIS10: 1709 ret = amdgpu_atombios_get_gfx_info(adev); 1710 if (ret) 1711 return ret; 1712 adev->gfx.config.max_gprs = 256; 1713 adev->gfx.config.max_gs_threads = 32; 1714 adev->gfx.config.max_hw_contexts = 8; 1715 1716 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1717 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1718 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1719 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1720 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1721 break; 1722 case CHIP_TONGA: 1723 adev->gfx.config.max_shader_engines = 4; 1724 adev->gfx.config.max_tile_pipes = 8; 1725 adev->gfx.config.max_cu_per_sh = 8; 1726 adev->gfx.config.max_sh_per_se = 1; 1727 adev->gfx.config.max_backends_per_se = 2; 1728 adev->gfx.config.max_texture_channel_caches = 8; 1729 adev->gfx.config.max_gprs = 256; 1730 adev->gfx.config.max_gs_threads = 32; 1731 adev->gfx.config.max_hw_contexts = 8; 1732 1733 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1734 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1735 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1736 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1737 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1738 break; 1739 case CHIP_CARRIZO: 1740 adev->gfx.config.max_shader_engines = 1; 1741 adev->gfx.config.max_tile_pipes = 2; 1742 adev->gfx.config.max_sh_per_se = 1; 1743 adev->gfx.config.max_backends_per_se = 2; 1744 adev->gfx.config.max_cu_per_sh = 8; 1745 adev->gfx.config.max_texture_channel_caches = 2; 1746 adev->gfx.config.max_gprs = 256; 1747 adev->gfx.config.max_gs_threads = 32; 1748 adev->gfx.config.max_hw_contexts = 8; 1749 1750 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1751 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1752 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1753 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1754 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1755 break; 1756 case CHIP_STONEY: 1757 adev->gfx.config.max_shader_engines = 1; 1758 adev->gfx.config.max_tile_pipes = 2; 1759 adev->gfx.config.max_sh_per_se = 1; 1760 adev->gfx.config.max_backends_per_se = 1; 1761 adev->gfx.config.max_cu_per_sh = 3; 1762 adev->gfx.config.max_texture_channel_caches = 2; 1763 adev->gfx.config.max_gprs = 256; 1764 adev->gfx.config.max_gs_threads = 16; 1765 adev->gfx.config.max_hw_contexts = 8; 1766 1767 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1768 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1769 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1770 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1771 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1772 break; 1773 default: 1774 adev->gfx.config.max_shader_engines = 2; 1775 adev->gfx.config.max_tile_pipes = 4; 1776 adev->gfx.config.max_cu_per_sh = 2; 1777 adev->gfx.config.max_sh_per_se = 1; 1778 adev->gfx.config.max_backends_per_se = 2; 1779 adev->gfx.config.max_texture_channel_caches = 4; 1780 adev->gfx.config.max_gprs = 256; 1781 adev->gfx.config.max_gs_threads = 32; 1782 adev->gfx.config.max_hw_contexts = 8; 1783 1784 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1785 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1786 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1787 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1788 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1789 break; 1790 } 1791 1792 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1793 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1794 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1795 1796 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1797 adev->gfx.config.mem_max_burst_length_bytes = 256; 1798 if (adev->flags & AMD_IS_APU) { 1799 /* Get memory bank mapping mode. */ 1800 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1801 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1802 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1803 1804 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1805 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1806 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1807 1808 /* Validate settings in case only one DIMM installed. */ 1809 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1810 dimm00_addr_map = 0; 1811 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1812 dimm01_addr_map = 0; 1813 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1814 dimm10_addr_map = 0; 1815 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1816 dimm11_addr_map = 0; 1817 1818 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1819 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1820 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1821 adev->gfx.config.mem_row_size_in_kb = 2; 1822 else 1823 adev->gfx.config.mem_row_size_in_kb = 1; 1824 } else { 1825 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1826 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1827 if (adev->gfx.config.mem_row_size_in_kb > 4) 1828 adev->gfx.config.mem_row_size_in_kb = 4; 1829 } 1830 1831 adev->gfx.config.shader_engine_tile_size = 32; 1832 adev->gfx.config.num_gpus = 1; 1833 adev->gfx.config.multi_gpu_tile_size = 64; 1834 1835 /* fix up row size */ 1836 switch (adev->gfx.config.mem_row_size_in_kb) { 1837 case 1: 1838 default: 1839 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1840 break; 1841 case 2: 1842 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1843 break; 1844 case 4: 1845 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1846 break; 1847 } 1848 adev->gfx.config.gb_addr_config = gb_addr_config; 1849 1850 return 0; 1851 } 1852 1853 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1854 int mec, int pipe, int queue) 1855 { 1856 int r; 1857 unsigned irq_type; 1858 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1859 1860 ring = &adev->gfx.compute_ring[ring_id]; 1861 1862 /* mec0 is me1 */ 1863 ring->me = mec + 1; 1864 ring->pipe = pipe; 1865 ring->queue = queue; 1866 1867 ring->ring_obj = NULL; 1868 ring->use_doorbell = true; 1869 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 1870 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1871 + (ring_id * GFX8_MEC_HPD_SIZE); 1872 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1873 1874 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1875 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1876 + ring->pipe; 1877 1878 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1879 r = amdgpu_ring_init(adev, ring, 1024, 1880 &adev->gfx.eop_irq, irq_type); 1881 if (r) 1882 return r; 1883 1884 1885 return 0; 1886 } 1887 1888 static int gfx_v8_0_sw_init(void *handle) 1889 { 1890 int i, j, k, r, ring_id; 1891 struct amdgpu_ring *ring; 1892 struct amdgpu_kiq *kiq; 1893 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1894 1895 switch (adev->asic_type) { 1896 case CHIP_FIJI: 1897 case CHIP_TONGA: 1898 case CHIP_POLARIS11: 1899 case CHIP_POLARIS12: 1900 case CHIP_POLARIS10: 1901 case CHIP_CARRIZO: 1902 adev->gfx.mec.num_mec = 2; 1903 break; 1904 case CHIP_TOPAZ: 1905 case CHIP_STONEY: 1906 default: 1907 adev->gfx.mec.num_mec = 1; 1908 break; 1909 } 1910 1911 adev->gfx.mec.num_pipe_per_mec = 4; 1912 adev->gfx.mec.num_queue_per_pipe = 8; 1913 1914 /* KIQ event */ 1915 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 1916 if (r) 1917 return r; 1918 1919 /* EOP Event */ 1920 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 1921 if (r) 1922 return r; 1923 1924 /* Privileged reg */ 1925 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 1926 &adev->gfx.priv_reg_irq); 1927 if (r) 1928 return r; 1929 1930 /* Privileged inst */ 1931 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 1932 &adev->gfx.priv_inst_irq); 1933 if (r) 1934 return r; 1935 1936 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1937 1938 gfx_v8_0_scratch_init(adev); 1939 1940 r = gfx_v8_0_init_microcode(adev); 1941 if (r) { 1942 DRM_ERROR("Failed to load gfx firmware!\n"); 1943 return r; 1944 } 1945 1946 r = gfx_v8_0_rlc_init(adev); 1947 if (r) { 1948 DRM_ERROR("Failed to init rlc BOs!\n"); 1949 return r; 1950 } 1951 1952 r = gfx_v8_0_mec_init(adev); 1953 if (r) { 1954 DRM_ERROR("Failed to init MEC BOs!\n"); 1955 return r; 1956 } 1957 1958 /* set up the gfx ring */ 1959 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1960 ring = &adev->gfx.gfx_ring[i]; 1961 ring->ring_obj = NULL; 1962 sprintf(ring->name, "gfx"); 1963 /* no gfx doorbells on iceland */ 1964 if (adev->asic_type != CHIP_TOPAZ) { 1965 ring->use_doorbell = true; 1966 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 1967 } 1968 1969 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 1970 AMDGPU_CP_IRQ_GFX_EOP); 1971 if (r) 1972 return r; 1973 } 1974 1975 1976 /* set up the compute queues - allocate horizontally across pipes */ 1977 ring_id = 0; 1978 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1979 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1980 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1981 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1982 continue; 1983 1984 r = gfx_v8_0_compute_ring_init(adev, 1985 ring_id, 1986 i, k, j); 1987 if (r) 1988 return r; 1989 1990 ring_id++; 1991 } 1992 } 1993 } 1994 1995 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 1996 if (r) { 1997 DRM_ERROR("Failed to init KIQ BOs!\n"); 1998 return r; 1999 } 2000 2001 kiq = &adev->gfx.kiq; 2002 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2003 if (r) 2004 return r; 2005 2006 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2007 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2008 if (r) 2009 return r; 2010 2011 /* reserve GDS, GWS and OA resource for gfx */ 2012 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2013 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2014 &adev->gds.gds_gfx_bo, NULL, NULL); 2015 if (r) 2016 return r; 2017 2018 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2019 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2020 &adev->gds.gws_gfx_bo, NULL, NULL); 2021 if (r) 2022 return r; 2023 2024 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2025 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2026 &adev->gds.oa_gfx_bo, NULL, NULL); 2027 if (r) 2028 return r; 2029 2030 adev->gfx.ce_ram_size = 0x8000; 2031 2032 r = gfx_v8_0_gpu_early_init(adev); 2033 if (r) 2034 return r; 2035 2036 return 0; 2037 } 2038 2039 static int gfx_v8_0_sw_fini(void *handle) 2040 { 2041 int i; 2042 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2043 2044 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2045 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2046 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2047 2048 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2049 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2050 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2051 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2052 2053 amdgpu_gfx_compute_mqd_sw_fini(adev); 2054 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2055 amdgpu_gfx_kiq_fini(adev); 2056 2057 gfx_v8_0_mec_fini(adev); 2058 gfx_v8_0_rlc_fini(adev); 2059 gfx_v8_0_free_microcode(adev); 2060 2061 return 0; 2062 } 2063 2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2065 { 2066 uint32_t *modearray, *mod2array; 2067 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2068 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2069 u32 reg_offset; 2070 2071 modearray = adev->gfx.config.tile_mode_array; 2072 mod2array = adev->gfx.config.macrotile_mode_array; 2073 2074 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2075 modearray[reg_offset] = 0; 2076 2077 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2078 mod2array[reg_offset] = 0; 2079 2080 switch (adev->asic_type) { 2081 case CHIP_TOPAZ: 2082 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2083 PIPE_CONFIG(ADDR_SURF_P2) | 2084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2086 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2087 PIPE_CONFIG(ADDR_SURF_P2) | 2088 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2090 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2091 PIPE_CONFIG(ADDR_SURF_P2) | 2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2093 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2094 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2095 PIPE_CONFIG(ADDR_SURF_P2) | 2096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2098 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2099 PIPE_CONFIG(ADDR_SURF_P2) | 2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2102 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2103 PIPE_CONFIG(ADDR_SURF_P2) | 2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2106 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2107 PIPE_CONFIG(ADDR_SURF_P2) | 2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2110 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2111 PIPE_CONFIG(ADDR_SURF_P2)); 2112 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2113 PIPE_CONFIG(ADDR_SURF_P2) | 2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2116 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2117 PIPE_CONFIG(ADDR_SURF_P2) | 2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2120 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2121 PIPE_CONFIG(ADDR_SURF_P2) | 2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2124 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2125 PIPE_CONFIG(ADDR_SURF_P2) | 2126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2128 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2129 PIPE_CONFIG(ADDR_SURF_P2) | 2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2132 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2133 PIPE_CONFIG(ADDR_SURF_P2) | 2134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2136 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2137 PIPE_CONFIG(ADDR_SURF_P2) | 2138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2140 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2141 PIPE_CONFIG(ADDR_SURF_P2) | 2142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2144 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2145 PIPE_CONFIG(ADDR_SURF_P2) | 2146 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2148 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2149 PIPE_CONFIG(ADDR_SURF_P2) | 2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2152 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2153 PIPE_CONFIG(ADDR_SURF_P2) | 2154 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2156 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2157 PIPE_CONFIG(ADDR_SURF_P2) | 2158 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2160 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2161 PIPE_CONFIG(ADDR_SURF_P2) | 2162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2164 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2165 PIPE_CONFIG(ADDR_SURF_P2) | 2166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2168 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2169 PIPE_CONFIG(ADDR_SURF_P2) | 2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2172 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2173 PIPE_CONFIG(ADDR_SURF_P2) | 2174 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2176 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2177 PIPE_CONFIG(ADDR_SURF_P2) | 2178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2180 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2181 PIPE_CONFIG(ADDR_SURF_P2) | 2182 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2184 2185 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2188 NUM_BANKS(ADDR_SURF_8_BANK)); 2189 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2192 NUM_BANKS(ADDR_SURF_8_BANK)); 2193 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2196 NUM_BANKS(ADDR_SURF_8_BANK)); 2197 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2200 NUM_BANKS(ADDR_SURF_8_BANK)); 2201 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2204 NUM_BANKS(ADDR_SURF_8_BANK)); 2205 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2208 NUM_BANKS(ADDR_SURF_8_BANK)); 2209 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2212 NUM_BANKS(ADDR_SURF_8_BANK)); 2213 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2216 NUM_BANKS(ADDR_SURF_16_BANK)); 2217 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2220 NUM_BANKS(ADDR_SURF_16_BANK)); 2221 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2224 NUM_BANKS(ADDR_SURF_16_BANK)); 2225 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2228 NUM_BANKS(ADDR_SURF_16_BANK)); 2229 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2232 NUM_BANKS(ADDR_SURF_16_BANK)); 2233 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2236 NUM_BANKS(ADDR_SURF_16_BANK)); 2237 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2240 NUM_BANKS(ADDR_SURF_8_BANK)); 2241 2242 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2243 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2244 reg_offset != 23) 2245 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2246 2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2248 if (reg_offset != 7) 2249 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2250 2251 break; 2252 case CHIP_FIJI: 2253 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2257 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2261 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2265 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2269 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2273 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2277 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2278 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2281 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2282 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2285 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2286 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2287 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2291 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2295 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2299 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2300 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2303 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2307 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2311 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2315 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2316 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2319 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2320 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2323 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2327 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2331 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2335 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2339 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2343 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2344 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2347 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2351 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2355 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2357 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2359 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2363 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2367 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2371 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2372 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2375 2376 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2379 NUM_BANKS(ADDR_SURF_8_BANK)); 2380 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2383 NUM_BANKS(ADDR_SURF_8_BANK)); 2384 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2387 NUM_BANKS(ADDR_SURF_8_BANK)); 2388 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2391 NUM_BANKS(ADDR_SURF_8_BANK)); 2392 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2395 NUM_BANKS(ADDR_SURF_8_BANK)); 2396 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2399 NUM_BANKS(ADDR_SURF_8_BANK)); 2400 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2403 NUM_BANKS(ADDR_SURF_8_BANK)); 2404 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2407 NUM_BANKS(ADDR_SURF_8_BANK)); 2408 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2411 NUM_BANKS(ADDR_SURF_8_BANK)); 2412 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2415 NUM_BANKS(ADDR_SURF_8_BANK)); 2416 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2419 NUM_BANKS(ADDR_SURF_8_BANK)); 2420 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2423 NUM_BANKS(ADDR_SURF_8_BANK)); 2424 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2427 NUM_BANKS(ADDR_SURF_8_BANK)); 2428 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2431 NUM_BANKS(ADDR_SURF_4_BANK)); 2432 2433 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2434 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2435 2436 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2437 if (reg_offset != 7) 2438 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2439 2440 break; 2441 case CHIP_TONGA: 2442 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2446 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2447 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2450 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2454 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2458 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2462 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2466 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2467 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2470 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2471 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2472 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2474 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2476 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2480 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2484 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2488 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2489 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2492 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2496 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2500 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2504 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2505 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2508 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2509 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2512 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2516 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2520 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2524 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2528 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2532 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2536 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2540 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2544 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2548 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2552 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2556 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2560 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2562 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2564 2565 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2568 NUM_BANKS(ADDR_SURF_16_BANK)); 2569 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2572 NUM_BANKS(ADDR_SURF_16_BANK)); 2573 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2576 NUM_BANKS(ADDR_SURF_16_BANK)); 2577 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2580 NUM_BANKS(ADDR_SURF_16_BANK)); 2581 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2584 NUM_BANKS(ADDR_SURF_16_BANK)); 2585 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2588 NUM_BANKS(ADDR_SURF_16_BANK)); 2589 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2592 NUM_BANKS(ADDR_SURF_16_BANK)); 2593 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2596 NUM_BANKS(ADDR_SURF_16_BANK)); 2597 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2600 NUM_BANKS(ADDR_SURF_16_BANK)); 2601 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2604 NUM_BANKS(ADDR_SURF_16_BANK)); 2605 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2608 NUM_BANKS(ADDR_SURF_16_BANK)); 2609 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2612 NUM_BANKS(ADDR_SURF_8_BANK)); 2613 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2616 NUM_BANKS(ADDR_SURF_4_BANK)); 2617 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2620 NUM_BANKS(ADDR_SURF_4_BANK)); 2621 2622 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2623 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2624 2625 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2626 if (reg_offset != 7) 2627 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2628 2629 break; 2630 case CHIP_POLARIS11: 2631 case CHIP_POLARIS12: 2632 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2636 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2640 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2644 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2648 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2652 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2654 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2656 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2660 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2664 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2665 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2666 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2670 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2674 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2678 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2682 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2686 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2690 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2694 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2698 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2702 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2706 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2710 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2714 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2718 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2722 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2726 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2730 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2734 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2738 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2742 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2744 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2746 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2748 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2750 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2752 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2754 2755 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2758 NUM_BANKS(ADDR_SURF_16_BANK)); 2759 2760 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2763 NUM_BANKS(ADDR_SURF_16_BANK)); 2764 2765 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2768 NUM_BANKS(ADDR_SURF_16_BANK)); 2769 2770 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2773 NUM_BANKS(ADDR_SURF_16_BANK)); 2774 2775 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2778 NUM_BANKS(ADDR_SURF_16_BANK)); 2779 2780 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2783 NUM_BANKS(ADDR_SURF_16_BANK)); 2784 2785 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2788 NUM_BANKS(ADDR_SURF_16_BANK)); 2789 2790 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2793 NUM_BANKS(ADDR_SURF_16_BANK)); 2794 2795 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2798 NUM_BANKS(ADDR_SURF_16_BANK)); 2799 2800 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2801 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2802 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2803 NUM_BANKS(ADDR_SURF_16_BANK)); 2804 2805 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2808 NUM_BANKS(ADDR_SURF_16_BANK)); 2809 2810 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2813 NUM_BANKS(ADDR_SURF_16_BANK)); 2814 2815 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2816 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2817 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2818 NUM_BANKS(ADDR_SURF_8_BANK)); 2819 2820 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2823 NUM_BANKS(ADDR_SURF_4_BANK)); 2824 2825 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2826 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2827 2828 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2829 if (reg_offset != 7) 2830 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2831 2832 break; 2833 case CHIP_POLARIS10: 2834 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2838 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2842 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2846 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2850 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2851 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2854 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2858 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2862 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2866 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2868 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2872 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2873 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2874 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2876 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2877 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2880 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2882 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2884 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2885 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2888 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2889 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2892 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2896 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2900 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2904 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2908 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2912 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2916 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2920 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2924 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2928 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2932 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2936 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2940 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2942 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2944 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2946 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2948 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2950 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2952 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2954 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2956 2957 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2960 NUM_BANKS(ADDR_SURF_16_BANK)); 2961 2962 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2965 NUM_BANKS(ADDR_SURF_16_BANK)); 2966 2967 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2970 NUM_BANKS(ADDR_SURF_16_BANK)); 2971 2972 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2975 NUM_BANKS(ADDR_SURF_16_BANK)); 2976 2977 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2980 NUM_BANKS(ADDR_SURF_16_BANK)); 2981 2982 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2985 NUM_BANKS(ADDR_SURF_16_BANK)); 2986 2987 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2990 NUM_BANKS(ADDR_SURF_16_BANK)); 2991 2992 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2995 NUM_BANKS(ADDR_SURF_16_BANK)); 2996 2997 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3000 NUM_BANKS(ADDR_SURF_16_BANK)); 3001 3002 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3005 NUM_BANKS(ADDR_SURF_16_BANK)); 3006 3007 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3010 NUM_BANKS(ADDR_SURF_16_BANK)); 3011 3012 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3015 NUM_BANKS(ADDR_SURF_8_BANK)); 3016 3017 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3020 NUM_BANKS(ADDR_SURF_4_BANK)); 3021 3022 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3023 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3024 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3025 NUM_BANKS(ADDR_SURF_4_BANK)); 3026 3027 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3028 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3029 3030 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3031 if (reg_offset != 7) 3032 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3033 3034 break; 3035 case CHIP_STONEY: 3036 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P2) | 3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3040 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3041 PIPE_CONFIG(ADDR_SURF_P2) | 3042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3044 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3045 PIPE_CONFIG(ADDR_SURF_P2) | 3046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3048 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3049 PIPE_CONFIG(ADDR_SURF_P2) | 3050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3052 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3053 PIPE_CONFIG(ADDR_SURF_P2) | 3054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3056 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3057 PIPE_CONFIG(ADDR_SURF_P2) | 3058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3060 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3061 PIPE_CONFIG(ADDR_SURF_P2) | 3062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3064 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3065 PIPE_CONFIG(ADDR_SURF_P2)); 3066 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3067 PIPE_CONFIG(ADDR_SURF_P2) | 3068 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3070 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3071 PIPE_CONFIG(ADDR_SURF_P2) | 3072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3074 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3075 PIPE_CONFIG(ADDR_SURF_P2) | 3076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3078 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3079 PIPE_CONFIG(ADDR_SURF_P2) | 3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3082 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3083 PIPE_CONFIG(ADDR_SURF_P2) | 3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3086 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3087 PIPE_CONFIG(ADDR_SURF_P2) | 3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3090 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3091 PIPE_CONFIG(ADDR_SURF_P2) | 3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3094 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3095 PIPE_CONFIG(ADDR_SURF_P2) | 3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3098 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3099 PIPE_CONFIG(ADDR_SURF_P2) | 3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3102 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3103 PIPE_CONFIG(ADDR_SURF_P2) | 3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3106 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3107 PIPE_CONFIG(ADDR_SURF_P2) | 3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3110 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3111 PIPE_CONFIG(ADDR_SURF_P2) | 3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3114 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3115 PIPE_CONFIG(ADDR_SURF_P2) | 3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3118 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3119 PIPE_CONFIG(ADDR_SURF_P2) | 3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3122 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3123 PIPE_CONFIG(ADDR_SURF_P2) | 3124 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3126 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3127 PIPE_CONFIG(ADDR_SURF_P2) | 3128 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3130 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3131 PIPE_CONFIG(ADDR_SURF_P2) | 3132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3134 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3135 PIPE_CONFIG(ADDR_SURF_P2) | 3136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3138 3139 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3142 NUM_BANKS(ADDR_SURF_8_BANK)); 3143 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3146 NUM_BANKS(ADDR_SURF_8_BANK)); 3147 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3150 NUM_BANKS(ADDR_SURF_8_BANK)); 3151 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3154 NUM_BANKS(ADDR_SURF_8_BANK)); 3155 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3158 NUM_BANKS(ADDR_SURF_8_BANK)); 3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3162 NUM_BANKS(ADDR_SURF_8_BANK)); 3163 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3166 NUM_BANKS(ADDR_SURF_8_BANK)); 3167 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3170 NUM_BANKS(ADDR_SURF_16_BANK)); 3171 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3174 NUM_BANKS(ADDR_SURF_16_BANK)); 3175 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3178 NUM_BANKS(ADDR_SURF_16_BANK)); 3179 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3182 NUM_BANKS(ADDR_SURF_16_BANK)); 3183 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3186 NUM_BANKS(ADDR_SURF_16_BANK)); 3187 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3190 NUM_BANKS(ADDR_SURF_16_BANK)); 3191 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3194 NUM_BANKS(ADDR_SURF_8_BANK)); 3195 3196 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3197 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3198 reg_offset != 23) 3199 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3200 3201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3202 if (reg_offset != 7) 3203 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3204 3205 break; 3206 default: 3207 dev_warn(adev->dev, 3208 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3209 adev->asic_type); 3210 3211 case CHIP_CARRIZO: 3212 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3213 PIPE_CONFIG(ADDR_SURF_P2) | 3214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3216 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3217 PIPE_CONFIG(ADDR_SURF_P2) | 3218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3220 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3221 PIPE_CONFIG(ADDR_SURF_P2) | 3222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3224 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3225 PIPE_CONFIG(ADDR_SURF_P2) | 3226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3228 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3229 PIPE_CONFIG(ADDR_SURF_P2) | 3230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3232 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3233 PIPE_CONFIG(ADDR_SURF_P2) | 3234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3236 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3237 PIPE_CONFIG(ADDR_SURF_P2) | 3238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3240 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3241 PIPE_CONFIG(ADDR_SURF_P2)); 3242 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3243 PIPE_CONFIG(ADDR_SURF_P2) | 3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3246 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3247 PIPE_CONFIG(ADDR_SURF_P2) | 3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3250 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3251 PIPE_CONFIG(ADDR_SURF_P2) | 3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3254 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3255 PIPE_CONFIG(ADDR_SURF_P2) | 3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3258 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3259 PIPE_CONFIG(ADDR_SURF_P2) | 3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3262 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3263 PIPE_CONFIG(ADDR_SURF_P2) | 3264 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3266 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3267 PIPE_CONFIG(ADDR_SURF_P2) | 3268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3270 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3271 PIPE_CONFIG(ADDR_SURF_P2) | 3272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3274 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3275 PIPE_CONFIG(ADDR_SURF_P2) | 3276 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3278 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3279 PIPE_CONFIG(ADDR_SURF_P2) | 3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3282 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3283 PIPE_CONFIG(ADDR_SURF_P2) | 3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3286 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3287 PIPE_CONFIG(ADDR_SURF_P2) | 3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3290 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3291 PIPE_CONFIG(ADDR_SURF_P2) | 3292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3294 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3295 PIPE_CONFIG(ADDR_SURF_P2) | 3296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3298 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3299 PIPE_CONFIG(ADDR_SURF_P2) | 3300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3302 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3303 PIPE_CONFIG(ADDR_SURF_P2) | 3304 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3306 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3307 PIPE_CONFIG(ADDR_SURF_P2) | 3308 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3310 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3311 PIPE_CONFIG(ADDR_SURF_P2) | 3312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3314 3315 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3318 NUM_BANKS(ADDR_SURF_8_BANK)); 3319 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3322 NUM_BANKS(ADDR_SURF_8_BANK)); 3323 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3326 NUM_BANKS(ADDR_SURF_8_BANK)); 3327 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3330 NUM_BANKS(ADDR_SURF_8_BANK)); 3331 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3334 NUM_BANKS(ADDR_SURF_8_BANK)); 3335 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3338 NUM_BANKS(ADDR_SURF_8_BANK)); 3339 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3342 NUM_BANKS(ADDR_SURF_8_BANK)); 3343 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3346 NUM_BANKS(ADDR_SURF_16_BANK)); 3347 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3350 NUM_BANKS(ADDR_SURF_16_BANK)); 3351 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3354 NUM_BANKS(ADDR_SURF_16_BANK)); 3355 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3358 NUM_BANKS(ADDR_SURF_16_BANK)); 3359 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3362 NUM_BANKS(ADDR_SURF_16_BANK)); 3363 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3366 NUM_BANKS(ADDR_SURF_16_BANK)); 3367 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3370 NUM_BANKS(ADDR_SURF_8_BANK)); 3371 3372 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3373 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3374 reg_offset != 23) 3375 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3376 3377 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3378 if (reg_offset != 7) 3379 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3380 3381 break; 3382 } 3383 } 3384 3385 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3386 u32 se_num, u32 sh_num, u32 instance) 3387 { 3388 u32 data; 3389 3390 if (instance == 0xffffffff) 3391 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3392 else 3393 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3394 3395 if (se_num == 0xffffffff) 3396 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3397 else 3398 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3399 3400 if (sh_num == 0xffffffff) 3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3402 else 3403 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3404 3405 WREG32(mmGRBM_GFX_INDEX, data); 3406 } 3407 3408 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3409 { 3410 u32 data, mask; 3411 3412 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3413 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3414 3415 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3416 3417 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3418 adev->gfx.config.max_sh_per_se); 3419 3420 return (~data) & mask; 3421 } 3422 3423 static void 3424 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3425 { 3426 switch (adev->asic_type) { 3427 case CHIP_FIJI: 3428 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3429 RB_XSEL2(1) | PKR_MAP(2) | 3430 PKR_XSEL(1) | PKR_YSEL(1) | 3431 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3432 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3433 SE_PAIR_YSEL(2); 3434 break; 3435 case CHIP_TONGA: 3436 case CHIP_POLARIS10: 3437 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3438 SE_XSEL(1) | SE_YSEL(1); 3439 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3440 SE_PAIR_YSEL(2); 3441 break; 3442 case CHIP_TOPAZ: 3443 case CHIP_CARRIZO: 3444 *rconf |= RB_MAP_PKR0(2); 3445 *rconf1 |= 0x0; 3446 break; 3447 case CHIP_POLARIS11: 3448 case CHIP_POLARIS12: 3449 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3450 SE_XSEL(1) | SE_YSEL(1); 3451 *rconf1 |= 0x0; 3452 break; 3453 case CHIP_STONEY: 3454 *rconf |= 0x0; 3455 *rconf1 |= 0x0; 3456 break; 3457 default: 3458 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3459 break; 3460 } 3461 } 3462 3463 static void 3464 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3465 u32 raster_config, u32 raster_config_1, 3466 unsigned rb_mask, unsigned num_rb) 3467 { 3468 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3469 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3470 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3471 unsigned rb_per_se = num_rb / num_se; 3472 unsigned se_mask[4]; 3473 unsigned se; 3474 3475 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3476 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3477 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3478 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3479 3480 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3481 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3482 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3483 3484 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3485 (!se_mask[2] && !se_mask[3]))) { 3486 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3487 3488 if (!se_mask[0] && !se_mask[1]) { 3489 raster_config_1 |= 3490 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3491 } else { 3492 raster_config_1 |= 3493 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3494 } 3495 } 3496 3497 for (se = 0; se < num_se; se++) { 3498 unsigned raster_config_se = raster_config; 3499 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3500 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3501 int idx = (se / 2) * 2; 3502 3503 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3504 raster_config_se &= ~SE_MAP_MASK; 3505 3506 if (!se_mask[idx]) { 3507 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3508 } else { 3509 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3510 } 3511 } 3512 3513 pkr0_mask &= rb_mask; 3514 pkr1_mask &= rb_mask; 3515 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3516 raster_config_se &= ~PKR_MAP_MASK; 3517 3518 if (!pkr0_mask) { 3519 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3520 } else { 3521 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3522 } 3523 } 3524 3525 if (rb_per_se >= 2) { 3526 unsigned rb0_mask = 1 << (se * rb_per_se); 3527 unsigned rb1_mask = rb0_mask << 1; 3528 3529 rb0_mask &= rb_mask; 3530 rb1_mask &= rb_mask; 3531 if (!rb0_mask || !rb1_mask) { 3532 raster_config_se &= ~RB_MAP_PKR0_MASK; 3533 3534 if (!rb0_mask) { 3535 raster_config_se |= 3536 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3537 } else { 3538 raster_config_se |= 3539 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3540 } 3541 } 3542 3543 if (rb_per_se > 2) { 3544 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3545 rb1_mask = rb0_mask << 1; 3546 rb0_mask &= rb_mask; 3547 rb1_mask &= rb_mask; 3548 if (!rb0_mask || !rb1_mask) { 3549 raster_config_se &= ~RB_MAP_PKR1_MASK; 3550 3551 if (!rb0_mask) { 3552 raster_config_se |= 3553 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3554 } else { 3555 raster_config_se |= 3556 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3557 } 3558 } 3559 } 3560 } 3561 3562 /* GRBM_GFX_INDEX has a different offset on VI */ 3563 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3564 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3565 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3566 } 3567 3568 /* GRBM_GFX_INDEX has a different offset on VI */ 3569 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3570 } 3571 3572 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3573 { 3574 int i, j; 3575 u32 data; 3576 u32 raster_config = 0, raster_config_1 = 0; 3577 u32 active_rbs = 0; 3578 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3579 adev->gfx.config.max_sh_per_se; 3580 unsigned num_rb_pipes; 3581 3582 mutex_lock(&adev->grbm_idx_mutex); 3583 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3584 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3585 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3586 data = gfx_v8_0_get_rb_active_bitmap(adev); 3587 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3588 rb_bitmap_width_per_sh); 3589 } 3590 } 3591 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3592 3593 adev->gfx.config.backend_enable_mask = active_rbs; 3594 adev->gfx.config.num_rbs = hweight32(active_rbs); 3595 3596 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3597 adev->gfx.config.max_shader_engines, 16); 3598 3599 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3600 3601 if (!adev->gfx.config.backend_enable_mask || 3602 adev->gfx.config.num_rbs >= num_rb_pipes) { 3603 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3604 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3605 } else { 3606 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3607 adev->gfx.config.backend_enable_mask, 3608 num_rb_pipes); 3609 } 3610 3611 /* cache the values for userspace */ 3612 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3613 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3614 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3615 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3616 RREG32(mmCC_RB_BACKEND_DISABLE); 3617 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3618 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3619 adev->gfx.config.rb_config[i][j].raster_config = 3620 RREG32(mmPA_SC_RASTER_CONFIG); 3621 adev->gfx.config.rb_config[i][j].raster_config_1 = 3622 RREG32(mmPA_SC_RASTER_CONFIG_1); 3623 } 3624 } 3625 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3626 mutex_unlock(&adev->grbm_idx_mutex); 3627 } 3628 3629 /** 3630 * gfx_v8_0_init_compute_vmid - gart enable 3631 * 3632 * @adev: amdgpu_device pointer 3633 * 3634 * Initialize compute vmid sh_mem registers 3635 * 3636 */ 3637 #define DEFAULT_SH_MEM_BASES (0x6000) 3638 #define FIRST_COMPUTE_VMID (8) 3639 #define LAST_COMPUTE_VMID (16) 3640 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3641 { 3642 int i; 3643 uint32_t sh_mem_config; 3644 uint32_t sh_mem_bases; 3645 3646 /* 3647 * Configure apertures: 3648 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3649 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3650 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3651 */ 3652 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3653 3654 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3655 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3656 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3657 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3658 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3659 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3660 3661 mutex_lock(&adev->srbm_mutex); 3662 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3663 vi_srbm_select(adev, 0, 0, 0, i); 3664 /* CP and shaders */ 3665 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3666 WREG32(mmSH_MEM_APE1_BASE, 1); 3667 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3668 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3669 } 3670 vi_srbm_select(adev, 0, 0, 0, 0); 3671 mutex_unlock(&adev->srbm_mutex); 3672 } 3673 3674 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3675 { 3676 switch (adev->asic_type) { 3677 default: 3678 adev->gfx.config.double_offchip_lds_buf = 1; 3679 break; 3680 case CHIP_CARRIZO: 3681 case CHIP_STONEY: 3682 adev->gfx.config.double_offchip_lds_buf = 0; 3683 break; 3684 } 3685 } 3686 3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3688 { 3689 u32 tmp, sh_static_mem_cfg; 3690 int i; 3691 3692 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3693 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3694 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3695 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3696 3697 gfx_v8_0_tiling_mode_table_init(adev); 3698 gfx_v8_0_setup_rb(adev); 3699 gfx_v8_0_get_cu_info(adev); 3700 gfx_v8_0_config_init(adev); 3701 3702 /* XXX SH_MEM regs */ 3703 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3704 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3705 SWIZZLE_ENABLE, 1); 3706 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3707 ELEMENT_SIZE, 1); 3708 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3709 INDEX_STRIDE, 3); 3710 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3711 3712 mutex_lock(&adev->srbm_mutex); 3713 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3714 vi_srbm_select(adev, 0, 0, 0, i); 3715 /* CP and shaders */ 3716 if (i == 0) { 3717 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3718 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3719 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3720 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3721 WREG32(mmSH_MEM_CONFIG, tmp); 3722 WREG32(mmSH_MEM_BASES, 0); 3723 } else { 3724 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3725 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3726 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3728 WREG32(mmSH_MEM_CONFIG, tmp); 3729 tmp = adev->mc.shared_aperture_start >> 48; 3730 WREG32(mmSH_MEM_BASES, tmp); 3731 } 3732 3733 WREG32(mmSH_MEM_APE1_BASE, 1); 3734 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3735 } 3736 vi_srbm_select(adev, 0, 0, 0, 0); 3737 mutex_unlock(&adev->srbm_mutex); 3738 3739 gfx_v8_0_init_compute_vmid(adev); 3740 3741 mutex_lock(&adev->grbm_idx_mutex); 3742 /* 3743 * making sure that the following register writes will be broadcasted 3744 * to all the shaders 3745 */ 3746 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3747 3748 WREG32(mmPA_SC_FIFO_SIZE, 3749 (adev->gfx.config.sc_prim_fifo_size_frontend << 3750 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3751 (adev->gfx.config.sc_prim_fifo_size_backend << 3752 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3753 (adev->gfx.config.sc_hiz_tile_fifo_size << 3754 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3755 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3756 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3757 3758 tmp = RREG32(mmSPI_ARB_PRIORITY); 3759 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3760 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3761 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3762 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3763 WREG32(mmSPI_ARB_PRIORITY, tmp); 3764 3765 mutex_unlock(&adev->grbm_idx_mutex); 3766 3767 } 3768 3769 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3770 { 3771 u32 i, j, k; 3772 u32 mask; 3773 3774 mutex_lock(&adev->grbm_idx_mutex); 3775 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3776 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3777 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3778 for (k = 0; k < adev->usec_timeout; k++) { 3779 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3780 break; 3781 udelay(1); 3782 } 3783 } 3784 } 3785 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3786 mutex_unlock(&adev->grbm_idx_mutex); 3787 3788 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3789 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3790 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3791 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3792 for (k = 0; k < adev->usec_timeout; k++) { 3793 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3794 break; 3795 udelay(1); 3796 } 3797 } 3798 3799 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3800 bool enable) 3801 { 3802 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3803 3804 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3805 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3806 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3807 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3808 3809 WREG32(mmCP_INT_CNTL_RING0, tmp); 3810 } 3811 3812 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3813 { 3814 /* csib */ 3815 WREG32(mmRLC_CSIB_ADDR_HI, 3816 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3817 WREG32(mmRLC_CSIB_ADDR_LO, 3818 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3819 WREG32(mmRLC_CSIB_LENGTH, 3820 adev->gfx.rlc.clear_state_size); 3821 } 3822 3823 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3824 int ind_offset, 3825 int list_size, 3826 int *unique_indices, 3827 int *indices_count, 3828 int max_indices, 3829 int *ind_start_offsets, 3830 int *offset_count, 3831 int max_offset) 3832 { 3833 int indices; 3834 bool new_entry = true; 3835 3836 for (; ind_offset < list_size; ind_offset++) { 3837 3838 if (new_entry) { 3839 new_entry = false; 3840 ind_start_offsets[*offset_count] = ind_offset; 3841 *offset_count = *offset_count + 1; 3842 BUG_ON(*offset_count >= max_offset); 3843 } 3844 3845 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3846 new_entry = true; 3847 continue; 3848 } 3849 3850 ind_offset += 2; 3851 3852 /* look for the matching indice */ 3853 for (indices = 0; 3854 indices < *indices_count; 3855 indices++) { 3856 if (unique_indices[indices] == 3857 register_list_format[ind_offset]) 3858 break; 3859 } 3860 3861 if (indices >= *indices_count) { 3862 unique_indices[*indices_count] = 3863 register_list_format[ind_offset]; 3864 indices = *indices_count; 3865 *indices_count = *indices_count + 1; 3866 BUG_ON(*indices_count >= max_indices); 3867 } 3868 3869 register_list_format[ind_offset] = indices; 3870 } 3871 } 3872 3873 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3874 { 3875 int i, temp, data; 3876 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3877 int indices_count = 0; 3878 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3879 int offset_count = 0; 3880 3881 int list_size; 3882 unsigned int *register_list_format = 3883 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3884 if (!register_list_format) 3885 return -ENOMEM; 3886 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3887 adev->gfx.rlc.reg_list_format_size_bytes); 3888 3889 gfx_v8_0_parse_ind_reg_list(register_list_format, 3890 RLC_FormatDirectRegListLength, 3891 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3892 unique_indices, 3893 &indices_count, 3894 sizeof(unique_indices) / sizeof(int), 3895 indirect_start_offsets, 3896 &offset_count, 3897 sizeof(indirect_start_offsets)/sizeof(int)); 3898 3899 /* save and restore list */ 3900 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3901 3902 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3903 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3904 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3905 3906 /* indirect list */ 3907 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3908 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3909 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3910 3911 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3912 list_size = list_size >> 1; 3913 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3914 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3915 3916 /* starting offsets starts */ 3917 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3918 adev->gfx.rlc.starting_offsets_start); 3919 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3920 WREG32(mmRLC_GPM_SCRATCH_DATA, 3921 indirect_start_offsets[i]); 3922 3923 /* unique indices */ 3924 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3925 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3926 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3927 if (unique_indices[i] != 0) { 3928 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 3929 WREG32(data + i, unique_indices[i] >> 20); 3930 } 3931 } 3932 kfree(register_list_format); 3933 3934 return 0; 3935 } 3936 3937 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3938 { 3939 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 3940 } 3941 3942 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 3943 { 3944 uint32_t data; 3945 3946 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 3947 3948 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 3949 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 3950 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 3951 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 3952 WREG32(mmRLC_PG_DELAY, data); 3953 3954 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 3955 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 3956 3957 } 3958 3959 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3960 bool enable) 3961 { 3962 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 3963 } 3964 3965 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 3966 bool enable) 3967 { 3968 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 3969 } 3970 3971 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 3972 { 3973 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 3974 } 3975 3976 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 3977 { 3978 if ((adev->asic_type == CHIP_CARRIZO) || 3979 (adev->asic_type == CHIP_STONEY)) { 3980 gfx_v8_0_init_csb(adev); 3981 gfx_v8_0_init_save_restore_list(adev); 3982 gfx_v8_0_enable_save_restore_machine(adev); 3983 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 3984 gfx_v8_0_init_power_gating(adev); 3985 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 3986 } else if ((adev->asic_type == CHIP_POLARIS11) || 3987 (adev->asic_type == CHIP_POLARIS12)) { 3988 gfx_v8_0_init_csb(adev); 3989 gfx_v8_0_init_save_restore_list(adev); 3990 gfx_v8_0_enable_save_restore_machine(adev); 3991 gfx_v8_0_init_power_gating(adev); 3992 } 3993 3994 } 3995 3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 3997 { 3998 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 3999 4000 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4001 gfx_v8_0_wait_for_rlc_serdes(adev); 4002 } 4003 4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4005 { 4006 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4007 udelay(50); 4008 4009 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4010 udelay(50); 4011 } 4012 4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4014 { 4015 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4016 4017 /* carrizo do enable cp interrupt after cp inited */ 4018 if (!(adev->flags & AMD_IS_APU)) 4019 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4020 4021 udelay(50); 4022 } 4023 4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4025 { 4026 const struct rlc_firmware_header_v2_0 *hdr; 4027 const __le32 *fw_data; 4028 unsigned i, fw_size; 4029 4030 if (!adev->gfx.rlc_fw) 4031 return -EINVAL; 4032 4033 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4034 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4035 4036 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4037 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4038 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4039 4040 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4041 for (i = 0; i < fw_size; i++) 4042 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4043 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4044 4045 return 0; 4046 } 4047 4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4049 { 4050 int r; 4051 u32 tmp; 4052 4053 gfx_v8_0_rlc_stop(adev); 4054 4055 /* disable CG */ 4056 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4057 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4058 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4059 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4060 if (adev->asic_type == CHIP_POLARIS11 || 4061 adev->asic_type == CHIP_POLARIS10 || 4062 adev->asic_type == CHIP_POLARIS12) { 4063 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4064 tmp &= ~0x3; 4065 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4066 } 4067 4068 /* disable PG */ 4069 WREG32(mmRLC_PG_CNTL, 0); 4070 4071 gfx_v8_0_rlc_reset(adev); 4072 gfx_v8_0_init_pg(adev); 4073 4074 if (!adev->pp_enabled) { 4075 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4076 /* legacy rlc firmware loading */ 4077 r = gfx_v8_0_rlc_load_microcode(adev); 4078 if (r) 4079 return r; 4080 } else { 4081 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4082 AMDGPU_UCODE_ID_RLC_G); 4083 if (r) 4084 return -EINVAL; 4085 } 4086 } 4087 4088 gfx_v8_0_rlc_start(adev); 4089 4090 return 0; 4091 } 4092 4093 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4094 { 4095 int i; 4096 u32 tmp = RREG32(mmCP_ME_CNTL); 4097 4098 if (enable) { 4099 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4100 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4101 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4102 } else { 4103 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4104 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4105 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4106 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4107 adev->gfx.gfx_ring[i].ready = false; 4108 } 4109 WREG32(mmCP_ME_CNTL, tmp); 4110 udelay(50); 4111 } 4112 4113 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4114 { 4115 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4116 const struct gfx_firmware_header_v1_0 *ce_hdr; 4117 const struct gfx_firmware_header_v1_0 *me_hdr; 4118 const __le32 *fw_data; 4119 unsigned i, fw_size; 4120 4121 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4122 return -EINVAL; 4123 4124 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4125 adev->gfx.pfp_fw->data; 4126 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4127 adev->gfx.ce_fw->data; 4128 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4129 adev->gfx.me_fw->data; 4130 4131 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4132 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4133 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4134 4135 gfx_v8_0_cp_gfx_enable(adev, false); 4136 4137 /* PFP */ 4138 fw_data = (const __le32 *) 4139 (adev->gfx.pfp_fw->data + 4140 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4141 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4142 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4143 for (i = 0; i < fw_size; i++) 4144 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4145 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4146 4147 /* CE */ 4148 fw_data = (const __le32 *) 4149 (adev->gfx.ce_fw->data + 4150 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4151 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4152 WREG32(mmCP_CE_UCODE_ADDR, 0); 4153 for (i = 0; i < fw_size; i++) 4154 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4155 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4156 4157 /* ME */ 4158 fw_data = (const __le32 *) 4159 (adev->gfx.me_fw->data + 4160 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4161 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4162 WREG32(mmCP_ME_RAM_WADDR, 0); 4163 for (i = 0; i < fw_size; i++) 4164 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4165 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4166 4167 return 0; 4168 } 4169 4170 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4171 { 4172 u32 count = 0; 4173 const struct cs_section_def *sect = NULL; 4174 const struct cs_extent_def *ext = NULL; 4175 4176 /* begin clear state */ 4177 count += 2; 4178 /* context control state */ 4179 count += 3; 4180 4181 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4182 for (ext = sect->section; ext->extent != NULL; ++ext) { 4183 if (sect->id == SECT_CONTEXT) 4184 count += 2 + ext->reg_count; 4185 else 4186 return 0; 4187 } 4188 } 4189 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4190 count += 4; 4191 /* end clear state */ 4192 count += 2; 4193 /* clear state */ 4194 count += 2; 4195 4196 return count; 4197 } 4198 4199 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4200 { 4201 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4202 const struct cs_section_def *sect = NULL; 4203 const struct cs_extent_def *ext = NULL; 4204 int r, i; 4205 4206 /* init the CP */ 4207 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4208 WREG32(mmCP_ENDIAN_SWAP, 0); 4209 WREG32(mmCP_DEVICE_ID, 1); 4210 4211 gfx_v8_0_cp_gfx_enable(adev, true); 4212 4213 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4214 if (r) { 4215 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4216 return r; 4217 } 4218 4219 /* clear state buffer */ 4220 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4221 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4222 4223 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4224 amdgpu_ring_write(ring, 0x80000000); 4225 amdgpu_ring_write(ring, 0x80000000); 4226 4227 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4228 for (ext = sect->section; ext->extent != NULL; ++ext) { 4229 if (sect->id == SECT_CONTEXT) { 4230 amdgpu_ring_write(ring, 4231 PACKET3(PACKET3_SET_CONTEXT_REG, 4232 ext->reg_count)); 4233 amdgpu_ring_write(ring, 4234 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4235 for (i = 0; i < ext->reg_count; i++) 4236 amdgpu_ring_write(ring, ext->extent[i]); 4237 } 4238 } 4239 } 4240 4241 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4242 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4243 switch (adev->asic_type) { 4244 case CHIP_TONGA: 4245 case CHIP_POLARIS10: 4246 amdgpu_ring_write(ring, 0x16000012); 4247 amdgpu_ring_write(ring, 0x0000002A); 4248 break; 4249 case CHIP_POLARIS11: 4250 case CHIP_POLARIS12: 4251 amdgpu_ring_write(ring, 0x16000012); 4252 amdgpu_ring_write(ring, 0x00000000); 4253 break; 4254 case CHIP_FIJI: 4255 amdgpu_ring_write(ring, 0x3a00161a); 4256 amdgpu_ring_write(ring, 0x0000002e); 4257 break; 4258 case CHIP_CARRIZO: 4259 amdgpu_ring_write(ring, 0x00000002); 4260 amdgpu_ring_write(ring, 0x00000000); 4261 break; 4262 case CHIP_TOPAZ: 4263 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4264 0x00000000 : 0x00000002); 4265 amdgpu_ring_write(ring, 0x00000000); 4266 break; 4267 case CHIP_STONEY: 4268 amdgpu_ring_write(ring, 0x00000000); 4269 amdgpu_ring_write(ring, 0x00000000); 4270 break; 4271 default: 4272 BUG(); 4273 } 4274 4275 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4276 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4277 4278 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4279 amdgpu_ring_write(ring, 0); 4280 4281 /* init the CE partitions */ 4282 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4283 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4284 amdgpu_ring_write(ring, 0x8000); 4285 amdgpu_ring_write(ring, 0x8000); 4286 4287 amdgpu_ring_commit(ring); 4288 4289 return 0; 4290 } 4291 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4292 { 4293 u32 tmp; 4294 /* no gfx doorbells on iceland */ 4295 if (adev->asic_type == CHIP_TOPAZ) 4296 return; 4297 4298 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4299 4300 if (ring->use_doorbell) { 4301 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4302 DOORBELL_OFFSET, ring->doorbell_index); 4303 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4304 DOORBELL_HIT, 0); 4305 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4306 DOORBELL_EN, 1); 4307 } else { 4308 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4309 } 4310 4311 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4312 4313 if (adev->flags & AMD_IS_APU) 4314 return; 4315 4316 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4317 DOORBELL_RANGE_LOWER, 4318 AMDGPU_DOORBELL_GFX_RING0); 4319 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4320 4321 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4322 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4323 } 4324 4325 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4326 { 4327 struct amdgpu_ring *ring; 4328 u32 tmp; 4329 u32 rb_bufsz; 4330 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4331 int r; 4332 4333 /* Set the write pointer delay */ 4334 WREG32(mmCP_RB_WPTR_DELAY, 0); 4335 4336 /* set the RB to use vmid 0 */ 4337 WREG32(mmCP_RB_VMID, 0); 4338 4339 /* Set ring buffer size */ 4340 ring = &adev->gfx.gfx_ring[0]; 4341 rb_bufsz = order_base_2(ring->ring_size / 8); 4342 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4343 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4344 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4345 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4346 #ifdef __BIG_ENDIAN 4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4348 #endif 4349 WREG32(mmCP_RB0_CNTL, tmp); 4350 4351 /* Initialize the ring buffer's read and write pointers */ 4352 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4353 ring->wptr = 0; 4354 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4355 4356 /* set the wb address wether it's enabled or not */ 4357 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4358 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4359 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4360 4361 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4362 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4363 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4364 mdelay(1); 4365 WREG32(mmCP_RB0_CNTL, tmp); 4366 4367 rb_addr = ring->gpu_addr >> 8; 4368 WREG32(mmCP_RB0_BASE, rb_addr); 4369 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4370 4371 gfx_v8_0_set_cpg_door_bell(adev, ring); 4372 /* start the ring */ 4373 amdgpu_ring_clear_ring(ring); 4374 gfx_v8_0_cp_gfx_start(adev); 4375 ring->ready = true; 4376 r = amdgpu_ring_test_ring(ring); 4377 if (r) 4378 ring->ready = false; 4379 4380 return r; 4381 } 4382 4383 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4384 { 4385 int i; 4386 4387 if (enable) { 4388 WREG32(mmCP_MEC_CNTL, 0); 4389 } else { 4390 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4391 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4392 adev->gfx.compute_ring[i].ready = false; 4393 adev->gfx.kiq.ring.ready = false; 4394 } 4395 udelay(50); 4396 } 4397 4398 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4399 { 4400 const struct gfx_firmware_header_v1_0 *mec_hdr; 4401 const __le32 *fw_data; 4402 unsigned i, fw_size; 4403 4404 if (!adev->gfx.mec_fw) 4405 return -EINVAL; 4406 4407 gfx_v8_0_cp_compute_enable(adev, false); 4408 4409 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4410 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4411 4412 fw_data = (const __le32 *) 4413 (adev->gfx.mec_fw->data + 4414 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4415 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4416 4417 /* MEC1 */ 4418 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4419 for (i = 0; i < fw_size; i++) 4420 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4421 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4422 4423 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4424 if (adev->gfx.mec2_fw) { 4425 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4426 4427 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4428 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4429 4430 fw_data = (const __le32 *) 4431 (adev->gfx.mec2_fw->data + 4432 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4433 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4434 4435 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4436 for (i = 0; i < fw_size; i++) 4437 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4438 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4439 } 4440 4441 return 0; 4442 } 4443 4444 /* KIQ functions */ 4445 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4446 { 4447 uint32_t tmp; 4448 struct amdgpu_device *adev = ring->adev; 4449 4450 /* tell RLC which is KIQ queue */ 4451 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4452 tmp &= 0xffffff00; 4453 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4454 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4455 tmp |= 0x80; 4456 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4457 } 4458 4459 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4460 { 4461 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4462 uint32_t scratch, tmp = 0; 4463 uint64_t queue_mask = 0; 4464 int r, i; 4465 4466 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4467 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4468 continue; 4469 4470 /* This situation may be hit in the future if a new HW 4471 * generation exposes more than 64 queues. If so, the 4472 * definition of queue_mask needs updating */ 4473 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4474 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4475 break; 4476 } 4477 4478 queue_mask |= (1ull << i); 4479 } 4480 4481 r = amdgpu_gfx_scratch_get(adev, &scratch); 4482 if (r) { 4483 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4484 return r; 4485 } 4486 WREG32(scratch, 0xCAFEDEAD); 4487 4488 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4489 if (r) { 4490 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4491 amdgpu_gfx_scratch_free(adev, scratch); 4492 return r; 4493 } 4494 /* set resources */ 4495 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4496 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4497 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4498 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4499 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4500 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4501 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4502 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4503 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4504 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4505 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4506 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4507 4508 /* map queues */ 4509 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4510 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4511 amdgpu_ring_write(kiq_ring, 4512 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4513 amdgpu_ring_write(kiq_ring, 4514 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4515 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4516 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4517 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4518 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4519 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4520 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4521 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4522 } 4523 /* write to scratch for completion */ 4524 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4525 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4526 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4527 amdgpu_ring_commit(kiq_ring); 4528 4529 for (i = 0; i < adev->usec_timeout; i++) { 4530 tmp = RREG32(scratch); 4531 if (tmp == 0xDEADBEEF) 4532 break; 4533 DRM_UDELAY(1); 4534 } 4535 if (i >= adev->usec_timeout) { 4536 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4537 scratch, tmp); 4538 r = -EINVAL; 4539 } 4540 amdgpu_gfx_scratch_free(adev, scratch); 4541 4542 return r; 4543 } 4544 4545 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4546 { 4547 int i, r = 0; 4548 4549 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4550 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4551 for (i = 0; i < adev->usec_timeout; i++) { 4552 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4553 break; 4554 udelay(1); 4555 } 4556 if (i == adev->usec_timeout) 4557 r = -ETIMEDOUT; 4558 } 4559 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4560 WREG32(mmCP_HQD_PQ_RPTR, 0); 4561 WREG32(mmCP_HQD_PQ_WPTR, 0); 4562 4563 return r; 4564 } 4565 4566 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4567 { 4568 struct amdgpu_device *adev = ring->adev; 4569 struct vi_mqd *mqd = ring->mqd_ptr; 4570 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4571 uint32_t tmp; 4572 4573 mqd->header = 0xC0310800; 4574 mqd->compute_pipelinestat_enable = 0x00000001; 4575 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4576 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4577 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4578 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4579 mqd->compute_misc_reserved = 0x00000003; 4580 if (!(adev->flags & AMD_IS_APU)) { 4581 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4582 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4583 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4584 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4585 } 4586 eop_base_addr = ring->eop_gpu_addr >> 8; 4587 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4588 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4589 4590 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4591 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4592 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4593 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4594 4595 mqd->cp_hqd_eop_control = tmp; 4596 4597 /* enable doorbell? */ 4598 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4599 CP_HQD_PQ_DOORBELL_CONTROL, 4600 DOORBELL_EN, 4601 ring->use_doorbell ? 1 : 0); 4602 4603 mqd->cp_hqd_pq_doorbell_control = tmp; 4604 4605 /* set the pointer to the MQD */ 4606 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4607 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4608 4609 /* set MQD vmid to 0 */ 4610 tmp = RREG32(mmCP_MQD_CONTROL); 4611 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4612 mqd->cp_mqd_control = tmp; 4613 4614 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4615 hqd_gpu_addr = ring->gpu_addr >> 8; 4616 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4617 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4618 4619 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4620 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4621 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4622 (order_base_2(ring->ring_size / 4) - 1)); 4623 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4624 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4625 #ifdef __BIG_ENDIAN 4626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4627 #endif 4628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4629 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4630 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4631 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4632 mqd->cp_hqd_pq_control = tmp; 4633 4634 /* set the wb address whether it's enabled or not */ 4635 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4636 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4637 mqd->cp_hqd_pq_rptr_report_addr_hi = 4638 upper_32_bits(wb_gpu_addr) & 0xffff; 4639 4640 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4641 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4642 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4643 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4644 4645 tmp = 0; 4646 /* enable the doorbell if requested */ 4647 if (ring->use_doorbell) { 4648 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4650 DOORBELL_OFFSET, ring->doorbell_index); 4651 4652 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4653 DOORBELL_EN, 1); 4654 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4655 DOORBELL_SOURCE, 0); 4656 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4657 DOORBELL_HIT, 0); 4658 } 4659 4660 mqd->cp_hqd_pq_doorbell_control = tmp; 4661 4662 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4663 ring->wptr = 0; 4664 mqd->cp_hqd_pq_wptr = ring->wptr; 4665 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4666 4667 /* set the vmid for the queue */ 4668 mqd->cp_hqd_vmid = 0; 4669 4670 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4671 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4672 mqd->cp_hqd_persistent_state = tmp; 4673 4674 /* set MTYPE */ 4675 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4676 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4677 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4678 mqd->cp_hqd_ib_control = tmp; 4679 4680 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4681 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4682 mqd->cp_hqd_iq_timer = tmp; 4683 4684 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4685 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4686 mqd->cp_hqd_ctx_save_control = tmp; 4687 4688 /* defaults */ 4689 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4690 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4691 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4692 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4693 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4694 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4695 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4696 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4697 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4698 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4699 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4700 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4701 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4702 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4703 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4704 4705 /* activate the queue */ 4706 mqd->cp_hqd_active = 1; 4707 4708 return 0; 4709 } 4710 4711 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4712 struct vi_mqd *mqd) 4713 { 4714 uint32_t mqd_reg; 4715 uint32_t *mqd_data; 4716 4717 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4718 mqd_data = &mqd->cp_mqd_base_addr_lo; 4719 4720 /* disable wptr polling */ 4721 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4722 4723 /* program all HQD registers */ 4724 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4725 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4726 4727 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4728 * This is safe since EOP RPTR==WPTR for any inactive HQD 4729 * on ASICs that do not support context-save. 4730 * EOP writes/reads can start anywhere in the ring. 4731 */ 4732 if (adev->asic_type != CHIP_TONGA) { 4733 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4734 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4735 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4736 } 4737 4738 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4739 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4740 4741 /* activate the HQD */ 4742 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4743 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4744 4745 return 0; 4746 } 4747 4748 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4749 { 4750 struct amdgpu_device *adev = ring->adev; 4751 struct vi_mqd *mqd = ring->mqd_ptr; 4752 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4753 4754 gfx_v8_0_kiq_setting(ring); 4755 4756 if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4757 /* reset MQD to a clean status */ 4758 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4759 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4760 4761 /* reset ring buffer */ 4762 ring->wptr = 0; 4763 amdgpu_ring_clear_ring(ring); 4764 mutex_lock(&adev->srbm_mutex); 4765 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4766 gfx_v8_0_mqd_commit(adev, mqd); 4767 vi_srbm_select(adev, 0, 0, 0, 0); 4768 mutex_unlock(&adev->srbm_mutex); 4769 } else { 4770 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4771 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4772 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4773 mutex_lock(&adev->srbm_mutex); 4774 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4775 gfx_v8_0_mqd_init(ring); 4776 gfx_v8_0_mqd_commit(adev, mqd); 4777 vi_srbm_select(adev, 0, 0, 0, 0); 4778 mutex_unlock(&adev->srbm_mutex); 4779 4780 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4781 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4782 } 4783 4784 return 0; 4785 } 4786 4787 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4788 { 4789 struct amdgpu_device *adev = ring->adev; 4790 struct vi_mqd *mqd = ring->mqd_ptr; 4791 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4792 4793 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { 4794 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4795 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4796 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4797 mutex_lock(&adev->srbm_mutex); 4798 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4799 gfx_v8_0_mqd_init(ring); 4800 vi_srbm_select(adev, 0, 0, 0, 0); 4801 mutex_unlock(&adev->srbm_mutex); 4802 4803 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4804 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4805 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4806 /* reset MQD to a clean status */ 4807 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4808 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4809 /* reset ring buffer */ 4810 ring->wptr = 0; 4811 amdgpu_ring_clear_ring(ring); 4812 } else { 4813 amdgpu_ring_clear_ring(ring); 4814 } 4815 return 0; 4816 } 4817 4818 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4819 { 4820 if (adev->asic_type > CHIP_TONGA) { 4821 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4822 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4823 } 4824 /* enable doorbells */ 4825 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4826 } 4827 4828 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4829 { 4830 struct amdgpu_ring *ring = NULL; 4831 int r = 0, i; 4832 4833 gfx_v8_0_cp_compute_enable(adev, true); 4834 4835 ring = &adev->gfx.kiq.ring; 4836 4837 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4838 if (unlikely(r != 0)) 4839 goto done; 4840 4841 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4842 if (!r) { 4843 r = gfx_v8_0_kiq_init_queue(ring); 4844 amdgpu_bo_kunmap(ring->mqd_obj); 4845 ring->mqd_ptr = NULL; 4846 } 4847 amdgpu_bo_unreserve(ring->mqd_obj); 4848 if (r) 4849 goto done; 4850 4851 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4852 ring = &adev->gfx.compute_ring[i]; 4853 4854 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4855 if (unlikely(r != 0)) 4856 goto done; 4857 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4858 if (!r) { 4859 r = gfx_v8_0_kcq_init_queue(ring); 4860 amdgpu_bo_kunmap(ring->mqd_obj); 4861 ring->mqd_ptr = NULL; 4862 } 4863 amdgpu_bo_unreserve(ring->mqd_obj); 4864 if (r) 4865 goto done; 4866 } 4867 4868 gfx_v8_0_set_mec_doorbell_range(adev); 4869 4870 r = gfx_v8_0_kiq_kcq_enable(adev); 4871 if (r) 4872 goto done; 4873 4874 /* Test KIQ */ 4875 ring = &adev->gfx.kiq.ring; 4876 ring->ready = true; 4877 r = amdgpu_ring_test_ring(ring); 4878 if (r) { 4879 ring->ready = false; 4880 goto done; 4881 } 4882 4883 /* Test KCQs */ 4884 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4885 ring = &adev->gfx.compute_ring[i]; 4886 ring->ready = true; 4887 r = amdgpu_ring_test_ring(ring); 4888 if (r) 4889 ring->ready = false; 4890 } 4891 4892 done: 4893 return r; 4894 } 4895 4896 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4897 { 4898 int r; 4899 4900 if (!(adev->flags & AMD_IS_APU)) 4901 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4902 4903 if (!adev->pp_enabled) { 4904 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4905 /* legacy firmware loading */ 4906 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4907 if (r) 4908 return r; 4909 4910 r = gfx_v8_0_cp_compute_load_microcode(adev); 4911 if (r) 4912 return r; 4913 } else { 4914 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4915 AMDGPU_UCODE_ID_CP_CE); 4916 if (r) 4917 return -EINVAL; 4918 4919 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4920 AMDGPU_UCODE_ID_CP_PFP); 4921 if (r) 4922 return -EINVAL; 4923 4924 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4925 AMDGPU_UCODE_ID_CP_ME); 4926 if (r) 4927 return -EINVAL; 4928 4929 if (adev->asic_type == CHIP_TOPAZ) { 4930 r = gfx_v8_0_cp_compute_load_microcode(adev); 4931 if (r) 4932 return r; 4933 } else { 4934 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4935 AMDGPU_UCODE_ID_CP_MEC1); 4936 if (r) 4937 return -EINVAL; 4938 } 4939 } 4940 } 4941 4942 r = gfx_v8_0_cp_gfx_resume(adev); 4943 if (r) 4944 return r; 4945 4946 r = gfx_v8_0_kiq_resume(adev); 4947 if (r) 4948 return r; 4949 4950 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4951 4952 return 0; 4953 } 4954 4955 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4956 { 4957 gfx_v8_0_cp_gfx_enable(adev, enable); 4958 gfx_v8_0_cp_compute_enable(adev, enable); 4959 } 4960 4961 static int gfx_v8_0_hw_init(void *handle) 4962 { 4963 int r; 4964 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4965 4966 gfx_v8_0_init_golden_registers(adev); 4967 gfx_v8_0_gpu_init(adev); 4968 4969 r = gfx_v8_0_rlc_resume(adev); 4970 if (r) 4971 return r; 4972 4973 r = gfx_v8_0_cp_resume(adev); 4974 4975 return r; 4976 } 4977 4978 static int gfx_v8_0_hw_fini(void *handle) 4979 { 4980 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4981 4982 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4983 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4984 if (amdgpu_sriov_vf(adev)) { 4985 pr_debug("For SRIOV client, shouldn't do anything.\n"); 4986 return 0; 4987 } 4988 gfx_v8_0_cp_enable(adev, false); 4989 gfx_v8_0_rlc_stop(adev); 4990 4991 amdgpu_set_powergating_state(adev, 4992 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 4993 4994 return 0; 4995 } 4996 4997 static int gfx_v8_0_suspend(void *handle) 4998 { 4999 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5000 adev->gfx.in_suspend = true; 5001 return gfx_v8_0_hw_fini(adev); 5002 } 5003 5004 static int gfx_v8_0_resume(void *handle) 5005 { 5006 int r; 5007 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5008 5009 r = gfx_v8_0_hw_init(adev); 5010 adev->gfx.in_suspend = false; 5011 return r; 5012 } 5013 5014 static bool gfx_v8_0_is_idle(void *handle) 5015 { 5016 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5017 5018 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5019 return false; 5020 else 5021 return true; 5022 } 5023 5024 static int gfx_v8_0_wait_for_idle(void *handle) 5025 { 5026 unsigned i; 5027 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5028 5029 for (i = 0; i < adev->usec_timeout; i++) { 5030 if (gfx_v8_0_is_idle(handle)) 5031 return 0; 5032 5033 udelay(1); 5034 } 5035 return -ETIMEDOUT; 5036 } 5037 5038 static bool gfx_v8_0_check_soft_reset(void *handle) 5039 { 5040 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5041 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5042 u32 tmp; 5043 5044 /* GRBM_STATUS */ 5045 tmp = RREG32(mmGRBM_STATUS); 5046 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5047 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5048 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5049 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5050 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5051 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5052 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5053 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5054 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5055 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5056 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5057 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5058 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5059 } 5060 5061 /* GRBM_STATUS2 */ 5062 tmp = RREG32(mmGRBM_STATUS2); 5063 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5064 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5065 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5066 5067 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5068 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5069 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5070 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5071 SOFT_RESET_CPF, 1); 5072 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5073 SOFT_RESET_CPC, 1); 5074 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5075 SOFT_RESET_CPG, 1); 5076 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5077 SOFT_RESET_GRBM, 1); 5078 } 5079 5080 /* SRBM_STATUS */ 5081 tmp = RREG32(mmSRBM_STATUS); 5082 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5083 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5084 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5085 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5086 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5087 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5088 5089 if (grbm_soft_reset || srbm_soft_reset) { 5090 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5091 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5092 return true; 5093 } else { 5094 adev->gfx.grbm_soft_reset = 0; 5095 adev->gfx.srbm_soft_reset = 0; 5096 return false; 5097 } 5098 } 5099 5100 static int gfx_v8_0_pre_soft_reset(void *handle) 5101 { 5102 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5103 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5104 5105 if ((!adev->gfx.grbm_soft_reset) && 5106 (!adev->gfx.srbm_soft_reset)) 5107 return 0; 5108 5109 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5110 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5111 5112 /* stop the rlc */ 5113 gfx_v8_0_rlc_stop(adev); 5114 5115 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5116 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5117 /* Disable GFX parsing/prefetching */ 5118 gfx_v8_0_cp_gfx_enable(adev, false); 5119 5120 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5121 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5122 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5123 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5124 int i; 5125 5126 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5127 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5128 5129 mutex_lock(&adev->srbm_mutex); 5130 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5131 gfx_v8_0_deactivate_hqd(adev, 2); 5132 vi_srbm_select(adev, 0, 0, 0, 0); 5133 mutex_unlock(&adev->srbm_mutex); 5134 } 5135 /* Disable MEC parsing/prefetching */ 5136 gfx_v8_0_cp_compute_enable(adev, false); 5137 } 5138 5139 return 0; 5140 } 5141 5142 static int gfx_v8_0_soft_reset(void *handle) 5143 { 5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5145 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5146 u32 tmp; 5147 5148 if ((!adev->gfx.grbm_soft_reset) && 5149 (!adev->gfx.srbm_soft_reset)) 5150 return 0; 5151 5152 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5153 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5154 5155 if (grbm_soft_reset || srbm_soft_reset) { 5156 tmp = RREG32(mmGMCON_DEBUG); 5157 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5158 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5159 WREG32(mmGMCON_DEBUG, tmp); 5160 udelay(50); 5161 } 5162 5163 if (grbm_soft_reset) { 5164 tmp = RREG32(mmGRBM_SOFT_RESET); 5165 tmp |= grbm_soft_reset; 5166 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5167 WREG32(mmGRBM_SOFT_RESET, tmp); 5168 tmp = RREG32(mmGRBM_SOFT_RESET); 5169 5170 udelay(50); 5171 5172 tmp &= ~grbm_soft_reset; 5173 WREG32(mmGRBM_SOFT_RESET, tmp); 5174 tmp = RREG32(mmGRBM_SOFT_RESET); 5175 } 5176 5177 if (srbm_soft_reset) { 5178 tmp = RREG32(mmSRBM_SOFT_RESET); 5179 tmp |= srbm_soft_reset; 5180 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5181 WREG32(mmSRBM_SOFT_RESET, tmp); 5182 tmp = RREG32(mmSRBM_SOFT_RESET); 5183 5184 udelay(50); 5185 5186 tmp &= ~srbm_soft_reset; 5187 WREG32(mmSRBM_SOFT_RESET, tmp); 5188 tmp = RREG32(mmSRBM_SOFT_RESET); 5189 } 5190 5191 if (grbm_soft_reset || srbm_soft_reset) { 5192 tmp = RREG32(mmGMCON_DEBUG); 5193 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5194 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5195 WREG32(mmGMCON_DEBUG, tmp); 5196 } 5197 5198 /* Wait a little for things to settle down */ 5199 udelay(50); 5200 5201 return 0; 5202 } 5203 5204 static int gfx_v8_0_post_soft_reset(void *handle) 5205 { 5206 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5207 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5208 5209 if ((!adev->gfx.grbm_soft_reset) && 5210 (!adev->gfx.srbm_soft_reset)) 5211 return 0; 5212 5213 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5214 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5215 5216 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5217 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5218 gfx_v8_0_cp_gfx_resume(adev); 5219 5220 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5221 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5222 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5223 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5224 int i; 5225 5226 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5227 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5228 5229 mutex_lock(&adev->srbm_mutex); 5230 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5231 gfx_v8_0_deactivate_hqd(adev, 2); 5232 vi_srbm_select(adev, 0, 0, 0, 0); 5233 mutex_unlock(&adev->srbm_mutex); 5234 } 5235 gfx_v8_0_kiq_resume(adev); 5236 } 5237 gfx_v8_0_rlc_start(adev); 5238 5239 return 0; 5240 } 5241 5242 /** 5243 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5244 * 5245 * @adev: amdgpu_device pointer 5246 * 5247 * Fetches a GPU clock counter snapshot. 5248 * Returns the 64 bit clock counter snapshot. 5249 */ 5250 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5251 { 5252 uint64_t clock; 5253 5254 mutex_lock(&adev->gfx.gpu_clock_mutex); 5255 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5256 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5257 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5258 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5259 return clock; 5260 } 5261 5262 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5263 uint32_t vmid, 5264 uint32_t gds_base, uint32_t gds_size, 5265 uint32_t gws_base, uint32_t gws_size, 5266 uint32_t oa_base, uint32_t oa_size) 5267 { 5268 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5269 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5270 5271 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5272 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5273 5274 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5275 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5276 5277 /* GDS Base */ 5278 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5279 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5280 WRITE_DATA_DST_SEL(0))); 5281 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5282 amdgpu_ring_write(ring, 0); 5283 amdgpu_ring_write(ring, gds_base); 5284 5285 /* GDS Size */ 5286 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5287 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5288 WRITE_DATA_DST_SEL(0))); 5289 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5290 amdgpu_ring_write(ring, 0); 5291 amdgpu_ring_write(ring, gds_size); 5292 5293 /* GWS */ 5294 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5295 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5296 WRITE_DATA_DST_SEL(0))); 5297 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5298 amdgpu_ring_write(ring, 0); 5299 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5300 5301 /* OA */ 5302 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5303 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5304 WRITE_DATA_DST_SEL(0))); 5305 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5306 amdgpu_ring_write(ring, 0); 5307 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5308 } 5309 5310 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5311 { 5312 WREG32(mmSQ_IND_INDEX, 5313 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5314 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5315 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5316 (SQ_IND_INDEX__FORCE_READ_MASK)); 5317 return RREG32(mmSQ_IND_DATA); 5318 } 5319 5320 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5321 uint32_t wave, uint32_t thread, 5322 uint32_t regno, uint32_t num, uint32_t *out) 5323 { 5324 WREG32(mmSQ_IND_INDEX, 5325 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5326 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5327 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5328 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5329 (SQ_IND_INDEX__FORCE_READ_MASK) | 5330 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5331 while (num--) 5332 *(out++) = RREG32(mmSQ_IND_DATA); 5333 } 5334 5335 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5336 { 5337 /* type 0 wave data */ 5338 dst[(*no_fields)++] = 0; 5339 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5340 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5341 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5342 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5343 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5344 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5345 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5346 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5347 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5348 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5349 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5350 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5351 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5352 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5353 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5354 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5355 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5356 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5357 } 5358 5359 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5360 uint32_t wave, uint32_t start, 5361 uint32_t size, uint32_t *dst) 5362 { 5363 wave_read_regs( 5364 adev, simd, wave, 0, 5365 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5366 } 5367 5368 5369 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5370 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5371 .select_se_sh = &gfx_v8_0_select_se_sh, 5372 .read_wave_data = &gfx_v8_0_read_wave_data, 5373 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5374 }; 5375 5376 static int gfx_v8_0_early_init(void *handle) 5377 { 5378 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5379 5380 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5381 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5382 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5383 gfx_v8_0_set_ring_funcs(adev); 5384 gfx_v8_0_set_irq_funcs(adev); 5385 gfx_v8_0_set_gds_init(adev); 5386 gfx_v8_0_set_rlc_funcs(adev); 5387 5388 return 0; 5389 } 5390 5391 static int gfx_v8_0_late_init(void *handle) 5392 { 5393 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5394 int r; 5395 5396 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5397 if (r) 5398 return r; 5399 5400 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5401 if (r) 5402 return r; 5403 5404 /* requires IBs so do in late init after IB pool is initialized */ 5405 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5406 if (r) 5407 return r; 5408 5409 amdgpu_set_powergating_state(adev, 5410 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5411 5412 return 0; 5413 } 5414 5415 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5416 bool enable) 5417 { 5418 if ((adev->asic_type == CHIP_POLARIS11) || 5419 (adev->asic_type == CHIP_POLARIS12)) 5420 /* Send msg to SMU via Powerplay */ 5421 amdgpu_set_powergating_state(adev, 5422 AMD_IP_BLOCK_TYPE_SMC, 5423 enable ? 5424 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5425 5426 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5427 } 5428 5429 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5430 bool enable) 5431 { 5432 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5433 } 5434 5435 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5436 bool enable) 5437 { 5438 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5439 } 5440 5441 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5442 bool enable) 5443 { 5444 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5445 } 5446 5447 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5448 bool enable) 5449 { 5450 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5451 5452 /* Read any GFX register to wake up GFX. */ 5453 if (!enable) 5454 RREG32(mmDB_RENDER_CONTROL); 5455 } 5456 5457 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5458 bool enable) 5459 { 5460 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5461 cz_enable_gfx_cg_power_gating(adev, true); 5462 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5463 cz_enable_gfx_pipeline_power_gating(adev, true); 5464 } else { 5465 cz_enable_gfx_cg_power_gating(adev, false); 5466 cz_enable_gfx_pipeline_power_gating(adev, false); 5467 } 5468 } 5469 5470 static int gfx_v8_0_set_powergating_state(void *handle, 5471 enum amd_powergating_state state) 5472 { 5473 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5474 bool enable = (state == AMD_PG_STATE_GATE); 5475 5476 if (amdgpu_sriov_vf(adev)) 5477 return 0; 5478 5479 switch (adev->asic_type) { 5480 case CHIP_CARRIZO: 5481 case CHIP_STONEY: 5482 5483 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5484 cz_enable_sck_slow_down_on_power_up(adev, true); 5485 cz_enable_sck_slow_down_on_power_down(adev, true); 5486 } else { 5487 cz_enable_sck_slow_down_on_power_up(adev, false); 5488 cz_enable_sck_slow_down_on_power_down(adev, false); 5489 } 5490 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5491 cz_enable_cp_power_gating(adev, true); 5492 else 5493 cz_enable_cp_power_gating(adev, false); 5494 5495 cz_update_gfx_cg_power_gating(adev, enable); 5496 5497 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5498 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5499 else 5500 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5501 5502 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5503 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5504 else 5505 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5506 break; 5507 case CHIP_POLARIS11: 5508 case CHIP_POLARIS12: 5509 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5510 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5511 else 5512 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5513 5514 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5515 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5516 else 5517 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5518 5519 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5520 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5521 else 5522 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5523 break; 5524 default: 5525 break; 5526 } 5527 5528 return 0; 5529 } 5530 5531 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5532 { 5533 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5534 int data; 5535 5536 if (amdgpu_sriov_vf(adev)) 5537 *flags = 0; 5538 5539 /* AMD_CG_SUPPORT_GFX_MGCG */ 5540 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5541 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5542 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5543 5544 /* AMD_CG_SUPPORT_GFX_CGLG */ 5545 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5546 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5547 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5548 5549 /* AMD_CG_SUPPORT_GFX_CGLS */ 5550 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5551 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5552 5553 /* AMD_CG_SUPPORT_GFX_CGTS */ 5554 data = RREG32(mmCGTS_SM_CTRL_REG); 5555 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5556 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5557 5558 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5559 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5560 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5561 5562 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5563 data = RREG32(mmRLC_MEM_SLP_CNTL); 5564 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5565 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5566 5567 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5568 data = RREG32(mmCP_MEM_SLP_CNTL); 5569 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5570 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5571 } 5572 5573 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5574 uint32_t reg_addr, uint32_t cmd) 5575 { 5576 uint32_t data; 5577 5578 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5579 5580 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5581 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5582 5583 data = RREG32(mmRLC_SERDES_WR_CTRL); 5584 if (adev->asic_type == CHIP_STONEY) 5585 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5586 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5587 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5588 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5589 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5590 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5591 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5592 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5593 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5594 else 5595 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5596 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5597 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5598 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5599 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5600 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5601 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5602 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5603 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5604 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5605 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5606 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5607 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5608 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5609 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5610 5611 WREG32(mmRLC_SERDES_WR_CTRL, data); 5612 } 5613 5614 #define MSG_ENTER_RLC_SAFE_MODE 1 5615 #define MSG_EXIT_RLC_SAFE_MODE 0 5616 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5617 #define RLC_GPR_REG2__REQ__SHIFT 0 5618 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5619 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5620 5621 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5622 { 5623 u32 data; 5624 unsigned i; 5625 5626 data = RREG32(mmRLC_CNTL); 5627 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5628 return; 5629 5630 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5631 data |= RLC_SAFE_MODE__CMD_MASK; 5632 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5633 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5634 WREG32(mmRLC_SAFE_MODE, data); 5635 5636 for (i = 0; i < adev->usec_timeout; i++) { 5637 if ((RREG32(mmRLC_GPM_STAT) & 5638 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5639 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5640 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5641 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5642 break; 5643 udelay(1); 5644 } 5645 5646 for (i = 0; i < adev->usec_timeout; i++) { 5647 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5648 break; 5649 udelay(1); 5650 } 5651 adev->gfx.rlc.in_safe_mode = true; 5652 } 5653 } 5654 5655 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5656 { 5657 u32 data = 0; 5658 unsigned i; 5659 5660 data = RREG32(mmRLC_CNTL); 5661 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5662 return; 5663 5664 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5665 if (adev->gfx.rlc.in_safe_mode) { 5666 data |= RLC_SAFE_MODE__CMD_MASK; 5667 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5668 WREG32(mmRLC_SAFE_MODE, data); 5669 adev->gfx.rlc.in_safe_mode = false; 5670 } 5671 } 5672 5673 for (i = 0; i < adev->usec_timeout; i++) { 5674 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5675 break; 5676 udelay(1); 5677 } 5678 } 5679 5680 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5681 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5682 .exit_safe_mode = iceland_exit_rlc_safe_mode 5683 }; 5684 5685 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5686 bool enable) 5687 { 5688 uint32_t temp, data; 5689 5690 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5691 5692 /* It is disabled by HW by default */ 5693 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5694 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5695 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5696 /* 1 - RLC memory Light sleep */ 5697 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5698 5699 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5700 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5701 } 5702 5703 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5704 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5705 if (adev->flags & AMD_IS_APU) 5706 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5707 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5708 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5709 else 5710 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5711 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5712 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5713 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5714 5715 if (temp != data) 5716 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5717 5718 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5719 gfx_v8_0_wait_for_rlc_serdes(adev); 5720 5721 /* 5 - clear mgcg override */ 5722 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5723 5724 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5725 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5726 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5727 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5728 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5729 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5730 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5731 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5732 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5733 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5734 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5735 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5736 if (temp != data) 5737 WREG32(mmCGTS_SM_CTRL_REG, data); 5738 } 5739 udelay(50); 5740 5741 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5742 gfx_v8_0_wait_for_rlc_serdes(adev); 5743 } else { 5744 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5745 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5746 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5747 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5748 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5749 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5750 if (temp != data) 5751 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5752 5753 /* 2 - disable MGLS in RLC */ 5754 data = RREG32(mmRLC_MEM_SLP_CNTL); 5755 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5756 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5757 WREG32(mmRLC_MEM_SLP_CNTL, data); 5758 } 5759 5760 /* 3 - disable MGLS in CP */ 5761 data = RREG32(mmCP_MEM_SLP_CNTL); 5762 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5763 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5764 WREG32(mmCP_MEM_SLP_CNTL, data); 5765 } 5766 5767 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5768 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5769 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5770 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5771 if (temp != data) 5772 WREG32(mmCGTS_SM_CTRL_REG, data); 5773 5774 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5775 gfx_v8_0_wait_for_rlc_serdes(adev); 5776 5777 /* 6 - set mgcg override */ 5778 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5779 5780 udelay(50); 5781 5782 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5783 gfx_v8_0_wait_for_rlc_serdes(adev); 5784 } 5785 5786 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5787 } 5788 5789 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5790 bool enable) 5791 { 5792 uint32_t temp, temp1, data, data1; 5793 5794 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5795 5796 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5797 5798 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5799 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5800 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5801 if (temp1 != data1) 5802 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5803 5804 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5805 gfx_v8_0_wait_for_rlc_serdes(adev); 5806 5807 /* 2 - clear cgcg override */ 5808 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5809 5810 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5811 gfx_v8_0_wait_for_rlc_serdes(adev); 5812 5813 /* 3 - write cmd to set CGLS */ 5814 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5815 5816 /* 4 - enable cgcg */ 5817 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5818 5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5820 /* enable cgls*/ 5821 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5822 5823 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5824 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5825 5826 if (temp1 != data1) 5827 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5828 } else { 5829 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5830 } 5831 5832 if (temp != data) 5833 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5834 5835 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5836 * Cmp_busy/GFX_Idle interrupts 5837 */ 5838 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5839 } else { 5840 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5841 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5842 5843 /* TEST CGCG */ 5844 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5845 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5846 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5847 if (temp1 != data1) 5848 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5849 5850 /* read gfx register to wake up cgcg */ 5851 RREG32(mmCB_CGTT_SCLK_CTRL); 5852 RREG32(mmCB_CGTT_SCLK_CTRL); 5853 RREG32(mmCB_CGTT_SCLK_CTRL); 5854 RREG32(mmCB_CGTT_SCLK_CTRL); 5855 5856 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5857 gfx_v8_0_wait_for_rlc_serdes(adev); 5858 5859 /* write cmd to Set CGCG Overrride */ 5860 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5861 5862 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5863 gfx_v8_0_wait_for_rlc_serdes(adev); 5864 5865 /* write cmd to Clear CGLS */ 5866 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5867 5868 /* disable cgcg, cgls should be disabled too. */ 5869 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5870 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5871 if (temp != data) 5872 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5873 /* enable interrupts again for PG */ 5874 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5875 } 5876 5877 gfx_v8_0_wait_for_rlc_serdes(adev); 5878 5879 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5880 } 5881 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5882 bool enable) 5883 { 5884 if (enable) { 5885 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5886 * === MGCG + MGLS + TS(CG/LS) === 5887 */ 5888 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5889 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5890 } else { 5891 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5892 * === CGCG + CGLS === 5893 */ 5894 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5895 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5896 } 5897 return 0; 5898 } 5899 5900 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5901 enum amd_clockgating_state state) 5902 { 5903 uint32_t msg_id, pp_state = 0; 5904 uint32_t pp_support_state = 0; 5905 void *pp_handle = adev->powerplay.pp_handle; 5906 5907 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5908 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5909 pp_support_state = PP_STATE_SUPPORT_LS; 5910 pp_state = PP_STATE_LS; 5911 } 5912 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5913 pp_support_state |= PP_STATE_SUPPORT_CG; 5914 pp_state |= PP_STATE_CG; 5915 } 5916 if (state == AMD_CG_STATE_UNGATE) 5917 pp_state = 0; 5918 5919 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5920 PP_BLOCK_GFX_CG, 5921 pp_support_state, 5922 pp_state); 5923 amd_set_clockgating_by_smu(pp_handle, msg_id); 5924 } 5925 5926 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5927 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5928 pp_support_state = PP_STATE_SUPPORT_LS; 5929 pp_state = PP_STATE_LS; 5930 } 5931 5932 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5933 pp_support_state |= PP_STATE_SUPPORT_CG; 5934 pp_state |= PP_STATE_CG; 5935 } 5936 5937 if (state == AMD_CG_STATE_UNGATE) 5938 pp_state = 0; 5939 5940 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5941 PP_BLOCK_GFX_MG, 5942 pp_support_state, 5943 pp_state); 5944 amd_set_clockgating_by_smu(pp_handle, msg_id); 5945 } 5946 5947 return 0; 5948 } 5949 5950 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 5951 enum amd_clockgating_state state) 5952 { 5953 5954 uint32_t msg_id, pp_state = 0; 5955 uint32_t pp_support_state = 0; 5956 void *pp_handle = adev->powerplay.pp_handle; 5957 5958 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5959 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5960 pp_support_state = PP_STATE_SUPPORT_LS; 5961 pp_state = PP_STATE_LS; 5962 } 5963 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5964 pp_support_state |= PP_STATE_SUPPORT_CG; 5965 pp_state |= PP_STATE_CG; 5966 } 5967 if (state == AMD_CG_STATE_UNGATE) 5968 pp_state = 0; 5969 5970 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5971 PP_BLOCK_GFX_CG, 5972 pp_support_state, 5973 pp_state); 5974 amd_set_clockgating_by_smu(pp_handle, msg_id); 5975 } 5976 5977 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 5978 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5979 pp_support_state = PP_STATE_SUPPORT_LS; 5980 pp_state = PP_STATE_LS; 5981 } 5982 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5983 pp_support_state |= PP_STATE_SUPPORT_CG; 5984 pp_state |= PP_STATE_CG; 5985 } 5986 if (state == AMD_CG_STATE_UNGATE) 5987 pp_state = 0; 5988 5989 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5990 PP_BLOCK_GFX_3D, 5991 pp_support_state, 5992 pp_state); 5993 amd_set_clockgating_by_smu(pp_handle, msg_id); 5994 } 5995 5996 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5998 pp_support_state = PP_STATE_SUPPORT_LS; 5999 pp_state = PP_STATE_LS; 6000 } 6001 6002 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6003 pp_support_state |= PP_STATE_SUPPORT_CG; 6004 pp_state |= PP_STATE_CG; 6005 } 6006 6007 if (state == AMD_CG_STATE_UNGATE) 6008 pp_state = 0; 6009 6010 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6011 PP_BLOCK_GFX_MG, 6012 pp_support_state, 6013 pp_state); 6014 amd_set_clockgating_by_smu(pp_handle, msg_id); 6015 } 6016 6017 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6018 pp_support_state = PP_STATE_SUPPORT_LS; 6019 6020 if (state == AMD_CG_STATE_UNGATE) 6021 pp_state = 0; 6022 else 6023 pp_state = PP_STATE_LS; 6024 6025 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6026 PP_BLOCK_GFX_RLC, 6027 pp_support_state, 6028 pp_state); 6029 amd_set_clockgating_by_smu(pp_handle, msg_id); 6030 } 6031 6032 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6033 pp_support_state = PP_STATE_SUPPORT_LS; 6034 6035 if (state == AMD_CG_STATE_UNGATE) 6036 pp_state = 0; 6037 else 6038 pp_state = PP_STATE_LS; 6039 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6040 PP_BLOCK_GFX_CP, 6041 pp_support_state, 6042 pp_state); 6043 amd_set_clockgating_by_smu(pp_handle, msg_id); 6044 } 6045 6046 return 0; 6047 } 6048 6049 static int gfx_v8_0_set_clockgating_state(void *handle, 6050 enum amd_clockgating_state state) 6051 { 6052 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6053 6054 if (amdgpu_sriov_vf(adev)) 6055 return 0; 6056 6057 switch (adev->asic_type) { 6058 case CHIP_FIJI: 6059 case CHIP_CARRIZO: 6060 case CHIP_STONEY: 6061 gfx_v8_0_update_gfx_clock_gating(adev, 6062 state == AMD_CG_STATE_GATE); 6063 break; 6064 case CHIP_TONGA: 6065 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6066 break; 6067 case CHIP_POLARIS10: 6068 case CHIP_POLARIS11: 6069 case CHIP_POLARIS12: 6070 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6071 break; 6072 default: 6073 break; 6074 } 6075 return 0; 6076 } 6077 6078 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6079 { 6080 return ring->adev->wb.wb[ring->rptr_offs]; 6081 } 6082 6083 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6084 { 6085 struct amdgpu_device *adev = ring->adev; 6086 6087 if (ring->use_doorbell) 6088 /* XXX check if swapping is necessary on BE */ 6089 return ring->adev->wb.wb[ring->wptr_offs]; 6090 else 6091 return RREG32(mmCP_RB0_WPTR); 6092 } 6093 6094 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6095 { 6096 struct amdgpu_device *adev = ring->adev; 6097 6098 if (ring->use_doorbell) { 6099 /* XXX check if swapping is necessary on BE */ 6100 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6101 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6102 } else { 6103 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6104 (void)RREG32(mmCP_RB0_WPTR); 6105 } 6106 } 6107 6108 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6109 { 6110 u32 ref_and_mask, reg_mem_engine; 6111 6112 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6113 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6114 switch (ring->me) { 6115 case 1: 6116 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6117 break; 6118 case 2: 6119 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6120 break; 6121 default: 6122 return; 6123 } 6124 reg_mem_engine = 0; 6125 } else { 6126 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6127 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6128 } 6129 6130 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6131 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6132 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6133 reg_mem_engine)); 6134 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6135 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6136 amdgpu_ring_write(ring, ref_and_mask); 6137 amdgpu_ring_write(ring, ref_and_mask); 6138 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6139 } 6140 6141 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6142 { 6143 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6144 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6145 EVENT_INDEX(4)); 6146 6147 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6148 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6149 EVENT_INDEX(0)); 6150 } 6151 6152 6153 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6154 { 6155 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6156 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6157 WRITE_DATA_DST_SEL(0) | 6158 WR_CONFIRM)); 6159 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6160 amdgpu_ring_write(ring, 0); 6161 amdgpu_ring_write(ring, 1); 6162 6163 } 6164 6165 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6166 struct amdgpu_ib *ib, 6167 unsigned vm_id, bool ctx_switch) 6168 { 6169 u32 header, control = 0; 6170 6171 if (ib->flags & AMDGPU_IB_FLAG_CE) 6172 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6173 else 6174 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6175 6176 control |= ib->length_dw | (vm_id << 24); 6177 6178 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6179 control |= INDIRECT_BUFFER_PRE_ENB(1); 6180 6181 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6182 gfx_v8_0_ring_emit_de_meta(ring); 6183 } 6184 6185 amdgpu_ring_write(ring, header); 6186 amdgpu_ring_write(ring, 6187 #ifdef __BIG_ENDIAN 6188 (2 << 0) | 6189 #endif 6190 (ib->gpu_addr & 0xFFFFFFFC)); 6191 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6192 amdgpu_ring_write(ring, control); 6193 } 6194 6195 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6196 struct amdgpu_ib *ib, 6197 unsigned vm_id, bool ctx_switch) 6198 { 6199 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6200 6201 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6202 amdgpu_ring_write(ring, 6203 #ifdef __BIG_ENDIAN 6204 (2 << 0) | 6205 #endif 6206 (ib->gpu_addr & 0xFFFFFFFC)); 6207 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6208 amdgpu_ring_write(ring, control); 6209 } 6210 6211 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6212 u64 seq, unsigned flags) 6213 { 6214 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6215 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6216 6217 /* EVENT_WRITE_EOP - flush caches, send int */ 6218 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6219 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6220 EOP_TC_ACTION_EN | 6221 EOP_TC_WB_ACTION_EN | 6222 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6223 EVENT_INDEX(5))); 6224 amdgpu_ring_write(ring, addr & 0xfffffffc); 6225 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6226 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6227 amdgpu_ring_write(ring, lower_32_bits(seq)); 6228 amdgpu_ring_write(ring, upper_32_bits(seq)); 6229 6230 } 6231 6232 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6233 { 6234 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6235 uint32_t seq = ring->fence_drv.sync_seq; 6236 uint64_t addr = ring->fence_drv.gpu_addr; 6237 6238 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6239 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6240 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6241 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6242 amdgpu_ring_write(ring, addr & 0xfffffffc); 6243 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6244 amdgpu_ring_write(ring, seq); 6245 amdgpu_ring_write(ring, 0xffffffff); 6246 amdgpu_ring_write(ring, 4); /* poll interval */ 6247 } 6248 6249 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6250 unsigned vm_id, uint64_t pd_addr) 6251 { 6252 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6253 6254 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6255 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6256 WRITE_DATA_DST_SEL(0)) | 6257 WR_CONFIRM); 6258 if (vm_id < 8) { 6259 amdgpu_ring_write(ring, 6260 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6261 } else { 6262 amdgpu_ring_write(ring, 6263 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6264 } 6265 amdgpu_ring_write(ring, 0); 6266 amdgpu_ring_write(ring, pd_addr >> 12); 6267 6268 /* bits 0-15 are the VM contexts0-15 */ 6269 /* invalidate the cache */ 6270 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6271 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6272 WRITE_DATA_DST_SEL(0))); 6273 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6274 amdgpu_ring_write(ring, 0); 6275 amdgpu_ring_write(ring, 1 << vm_id); 6276 6277 /* wait for the invalidate to complete */ 6278 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6279 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6280 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6281 WAIT_REG_MEM_ENGINE(0))); /* me */ 6282 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6283 amdgpu_ring_write(ring, 0); 6284 amdgpu_ring_write(ring, 0); /* ref */ 6285 amdgpu_ring_write(ring, 0); /* mask */ 6286 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6287 6288 /* compute doesn't have PFP */ 6289 if (usepfp) { 6290 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6291 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6292 amdgpu_ring_write(ring, 0x0); 6293 } 6294 } 6295 6296 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6297 { 6298 return ring->adev->wb.wb[ring->wptr_offs]; 6299 } 6300 6301 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6302 { 6303 struct amdgpu_device *adev = ring->adev; 6304 6305 /* XXX check if swapping is necessary on BE */ 6306 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6307 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6308 } 6309 6310 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6311 u64 addr, u64 seq, 6312 unsigned flags) 6313 { 6314 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6315 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6316 6317 /* RELEASE_MEM - flush caches, send int */ 6318 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6319 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6320 EOP_TC_ACTION_EN | 6321 EOP_TC_WB_ACTION_EN | 6322 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6323 EVENT_INDEX(5))); 6324 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6325 amdgpu_ring_write(ring, addr & 0xfffffffc); 6326 amdgpu_ring_write(ring, upper_32_bits(addr)); 6327 amdgpu_ring_write(ring, lower_32_bits(seq)); 6328 amdgpu_ring_write(ring, upper_32_bits(seq)); 6329 } 6330 6331 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6332 u64 seq, unsigned int flags) 6333 { 6334 /* we only allocate 32bit for each seq wb address */ 6335 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6336 6337 /* write fence seq to the "addr" */ 6338 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6339 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6340 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6341 amdgpu_ring_write(ring, lower_32_bits(addr)); 6342 amdgpu_ring_write(ring, upper_32_bits(addr)); 6343 amdgpu_ring_write(ring, lower_32_bits(seq)); 6344 6345 if (flags & AMDGPU_FENCE_FLAG_INT) { 6346 /* set register to trigger INT */ 6347 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6348 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6349 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6350 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6351 amdgpu_ring_write(ring, 0); 6352 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6353 } 6354 } 6355 6356 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6357 { 6358 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6359 amdgpu_ring_write(ring, 0); 6360 } 6361 6362 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6363 { 6364 uint32_t dw2 = 0; 6365 6366 if (amdgpu_sriov_vf(ring->adev)) 6367 gfx_v8_0_ring_emit_ce_meta(ring); 6368 6369 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6370 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6371 gfx_v8_0_ring_emit_vgt_flush(ring); 6372 /* set load_global_config & load_global_uconfig */ 6373 dw2 |= 0x8001; 6374 /* set load_cs_sh_regs */ 6375 dw2 |= 0x01000000; 6376 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6377 dw2 |= 0x10002; 6378 6379 /* set load_ce_ram if preamble presented */ 6380 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6381 dw2 |= 0x10000000; 6382 } else { 6383 /* still load_ce_ram if this is the first time preamble presented 6384 * although there is no context switch happens. 6385 */ 6386 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6387 dw2 |= 0x10000000; 6388 } 6389 6390 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6391 amdgpu_ring_write(ring, dw2); 6392 amdgpu_ring_write(ring, 0); 6393 } 6394 6395 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6396 { 6397 unsigned ret; 6398 6399 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6400 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6401 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6402 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6403 ret = ring->wptr & ring->buf_mask; 6404 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6405 return ret; 6406 } 6407 6408 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6409 { 6410 unsigned cur; 6411 6412 BUG_ON(offset > ring->buf_mask); 6413 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6414 6415 cur = (ring->wptr & ring->buf_mask) - 1; 6416 if (likely(cur > offset)) 6417 ring->ring[offset] = cur - offset; 6418 else 6419 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6420 } 6421 6422 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6423 { 6424 struct amdgpu_device *adev = ring->adev; 6425 6426 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6427 amdgpu_ring_write(ring, 0 | /* src: register*/ 6428 (5 << 8) | /* dst: memory */ 6429 (1 << 20)); /* write confirm */ 6430 amdgpu_ring_write(ring, reg); 6431 amdgpu_ring_write(ring, 0); 6432 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6433 adev->virt.reg_val_offs * 4)); 6434 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6435 adev->virt.reg_val_offs * 4)); 6436 } 6437 6438 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6439 uint32_t val) 6440 { 6441 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6442 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6443 amdgpu_ring_write(ring, reg); 6444 amdgpu_ring_write(ring, 0); 6445 amdgpu_ring_write(ring, val); 6446 } 6447 6448 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6449 enum amdgpu_interrupt_state state) 6450 { 6451 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6452 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6453 } 6454 6455 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6456 int me, int pipe, 6457 enum amdgpu_interrupt_state state) 6458 { 6459 u32 mec_int_cntl, mec_int_cntl_reg; 6460 6461 /* 6462 * amdgpu controls only the first MEC. That's why this function only 6463 * handles the setting of interrupts for this specific MEC. All other 6464 * pipes' interrupts are set by amdkfd. 6465 */ 6466 6467 if (me == 1) { 6468 switch (pipe) { 6469 case 0: 6470 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6471 break; 6472 case 1: 6473 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6474 break; 6475 case 2: 6476 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6477 break; 6478 case 3: 6479 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6480 break; 6481 default: 6482 DRM_DEBUG("invalid pipe %d\n", pipe); 6483 return; 6484 } 6485 } else { 6486 DRM_DEBUG("invalid me %d\n", me); 6487 return; 6488 } 6489 6490 switch (state) { 6491 case AMDGPU_IRQ_STATE_DISABLE: 6492 mec_int_cntl = RREG32(mec_int_cntl_reg); 6493 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6494 WREG32(mec_int_cntl_reg, mec_int_cntl); 6495 break; 6496 case AMDGPU_IRQ_STATE_ENABLE: 6497 mec_int_cntl = RREG32(mec_int_cntl_reg); 6498 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6499 WREG32(mec_int_cntl_reg, mec_int_cntl); 6500 break; 6501 default: 6502 break; 6503 } 6504 } 6505 6506 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6507 struct amdgpu_irq_src *source, 6508 unsigned type, 6509 enum amdgpu_interrupt_state state) 6510 { 6511 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6512 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6513 6514 return 0; 6515 } 6516 6517 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6518 struct amdgpu_irq_src *source, 6519 unsigned type, 6520 enum amdgpu_interrupt_state state) 6521 { 6522 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6523 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6524 6525 return 0; 6526 } 6527 6528 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6529 struct amdgpu_irq_src *src, 6530 unsigned type, 6531 enum amdgpu_interrupt_state state) 6532 { 6533 switch (type) { 6534 case AMDGPU_CP_IRQ_GFX_EOP: 6535 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6536 break; 6537 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6538 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6539 break; 6540 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6541 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6542 break; 6543 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6544 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6545 break; 6546 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6547 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6548 break; 6549 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6550 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6551 break; 6552 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6553 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6554 break; 6555 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6556 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6557 break; 6558 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6559 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6560 break; 6561 default: 6562 break; 6563 } 6564 return 0; 6565 } 6566 6567 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6568 struct amdgpu_irq_src *source, 6569 struct amdgpu_iv_entry *entry) 6570 { 6571 int i; 6572 u8 me_id, pipe_id, queue_id; 6573 struct amdgpu_ring *ring; 6574 6575 DRM_DEBUG("IH: CP EOP\n"); 6576 me_id = (entry->ring_id & 0x0c) >> 2; 6577 pipe_id = (entry->ring_id & 0x03) >> 0; 6578 queue_id = (entry->ring_id & 0x70) >> 4; 6579 6580 switch (me_id) { 6581 case 0: 6582 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6583 break; 6584 case 1: 6585 case 2: 6586 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6587 ring = &adev->gfx.compute_ring[i]; 6588 /* Per-queue interrupt is supported for MEC starting from VI. 6589 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6590 */ 6591 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6592 amdgpu_fence_process(ring); 6593 } 6594 break; 6595 } 6596 return 0; 6597 } 6598 6599 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6600 struct amdgpu_irq_src *source, 6601 struct amdgpu_iv_entry *entry) 6602 { 6603 DRM_ERROR("Illegal register access in command stream\n"); 6604 schedule_work(&adev->reset_work); 6605 return 0; 6606 } 6607 6608 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6609 struct amdgpu_irq_src *source, 6610 struct amdgpu_iv_entry *entry) 6611 { 6612 DRM_ERROR("Illegal instruction in command stream\n"); 6613 schedule_work(&adev->reset_work); 6614 return 0; 6615 } 6616 6617 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6618 struct amdgpu_irq_src *src, 6619 unsigned int type, 6620 enum amdgpu_interrupt_state state) 6621 { 6622 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6623 6624 switch (type) { 6625 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6626 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6627 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6628 if (ring->me == 1) 6629 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6630 ring->pipe, 6631 GENERIC2_INT_ENABLE, 6632 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6633 else 6634 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6635 ring->pipe, 6636 GENERIC2_INT_ENABLE, 6637 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6638 break; 6639 default: 6640 BUG(); /* kiq only support GENERIC2_INT now */ 6641 break; 6642 } 6643 return 0; 6644 } 6645 6646 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 6647 struct amdgpu_irq_src *source, 6648 struct amdgpu_iv_entry *entry) 6649 { 6650 u8 me_id, pipe_id, queue_id; 6651 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6652 6653 me_id = (entry->ring_id & 0x0c) >> 2; 6654 pipe_id = (entry->ring_id & 0x03) >> 0; 6655 queue_id = (entry->ring_id & 0x70) >> 4; 6656 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 6657 me_id, pipe_id, queue_id); 6658 6659 amdgpu_fence_process(ring); 6660 return 0; 6661 } 6662 6663 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6664 .name = "gfx_v8_0", 6665 .early_init = gfx_v8_0_early_init, 6666 .late_init = gfx_v8_0_late_init, 6667 .sw_init = gfx_v8_0_sw_init, 6668 .sw_fini = gfx_v8_0_sw_fini, 6669 .hw_init = gfx_v8_0_hw_init, 6670 .hw_fini = gfx_v8_0_hw_fini, 6671 .suspend = gfx_v8_0_suspend, 6672 .resume = gfx_v8_0_resume, 6673 .is_idle = gfx_v8_0_is_idle, 6674 .wait_for_idle = gfx_v8_0_wait_for_idle, 6675 .check_soft_reset = gfx_v8_0_check_soft_reset, 6676 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6677 .soft_reset = gfx_v8_0_soft_reset, 6678 .post_soft_reset = gfx_v8_0_post_soft_reset, 6679 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6680 .set_powergating_state = gfx_v8_0_set_powergating_state, 6681 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6682 }; 6683 6684 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6685 .type = AMDGPU_RING_TYPE_GFX, 6686 .align_mask = 0xff, 6687 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6688 .support_64bit_ptrs = false, 6689 .get_rptr = gfx_v8_0_ring_get_rptr, 6690 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6691 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6692 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6693 5 + /* COND_EXEC */ 6694 7 + /* PIPELINE_SYNC */ 6695 19 + /* VM_FLUSH */ 6696 8 + /* FENCE for VM_FLUSH */ 6697 20 + /* GDS switch */ 6698 4 + /* double SWITCH_BUFFER, 6699 the first COND_EXEC jump to the place just 6700 prior to this double SWITCH_BUFFER */ 6701 5 + /* COND_EXEC */ 6702 7 + /* HDP_flush */ 6703 4 + /* VGT_flush */ 6704 14 + /* CE_META */ 6705 31 + /* DE_META */ 6706 3 + /* CNTX_CTRL */ 6707 5 + /* HDP_INVL */ 6708 8 + 8 + /* FENCE x2 */ 6709 2, /* SWITCH_BUFFER */ 6710 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6711 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6712 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6713 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6714 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6715 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6716 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6717 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6718 .test_ring = gfx_v8_0_ring_test_ring, 6719 .test_ib = gfx_v8_0_ring_test_ib, 6720 .insert_nop = amdgpu_ring_insert_nop, 6721 .pad_ib = amdgpu_ring_generic_pad_ib, 6722 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6723 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6724 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6725 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6726 }; 6727 6728 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6729 .type = AMDGPU_RING_TYPE_COMPUTE, 6730 .align_mask = 0xff, 6731 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6732 .support_64bit_ptrs = false, 6733 .get_rptr = gfx_v8_0_ring_get_rptr, 6734 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6735 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6736 .emit_frame_size = 6737 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6738 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6739 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6740 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6741 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6742 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6743 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6744 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6745 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6746 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6747 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6748 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6749 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6750 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6751 .test_ring = gfx_v8_0_ring_test_ring, 6752 .test_ib = gfx_v8_0_ring_test_ib, 6753 .insert_nop = amdgpu_ring_insert_nop, 6754 .pad_ib = amdgpu_ring_generic_pad_ib, 6755 }; 6756 6757 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6758 .type = AMDGPU_RING_TYPE_KIQ, 6759 .align_mask = 0xff, 6760 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6761 .support_64bit_ptrs = false, 6762 .get_rptr = gfx_v8_0_ring_get_rptr, 6763 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6764 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6765 .emit_frame_size = 6766 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6767 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6768 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6769 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6770 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6771 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6772 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6773 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6774 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6775 .test_ring = gfx_v8_0_ring_test_ring, 6776 .test_ib = gfx_v8_0_ring_test_ib, 6777 .insert_nop = amdgpu_ring_insert_nop, 6778 .pad_ib = amdgpu_ring_generic_pad_ib, 6779 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6780 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6781 }; 6782 6783 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6784 { 6785 int i; 6786 6787 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6788 6789 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6790 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6791 6792 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6793 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6794 } 6795 6796 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6797 .set = gfx_v8_0_set_eop_interrupt_state, 6798 .process = gfx_v8_0_eop_irq, 6799 }; 6800 6801 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6802 .set = gfx_v8_0_set_priv_reg_fault_state, 6803 .process = gfx_v8_0_priv_reg_irq, 6804 }; 6805 6806 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6807 .set = gfx_v8_0_set_priv_inst_fault_state, 6808 .process = gfx_v8_0_priv_inst_irq, 6809 }; 6810 6811 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 6812 .set = gfx_v8_0_kiq_set_interrupt_state, 6813 .process = gfx_v8_0_kiq_irq, 6814 }; 6815 6816 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6817 { 6818 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6819 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6820 6821 adev->gfx.priv_reg_irq.num_types = 1; 6822 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6823 6824 adev->gfx.priv_inst_irq.num_types = 1; 6825 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6826 6827 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 6828 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 6829 } 6830 6831 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6832 { 6833 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6834 } 6835 6836 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6837 { 6838 /* init asci gds info */ 6839 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6840 adev->gds.gws.total_size = 64; 6841 adev->gds.oa.total_size = 16; 6842 6843 if (adev->gds.mem.total_size == 64 * 1024) { 6844 adev->gds.mem.gfx_partition_size = 4096; 6845 adev->gds.mem.cs_partition_size = 4096; 6846 6847 adev->gds.gws.gfx_partition_size = 4; 6848 adev->gds.gws.cs_partition_size = 4; 6849 6850 adev->gds.oa.gfx_partition_size = 4; 6851 adev->gds.oa.cs_partition_size = 1; 6852 } else { 6853 adev->gds.mem.gfx_partition_size = 1024; 6854 adev->gds.mem.cs_partition_size = 1024; 6855 6856 adev->gds.gws.gfx_partition_size = 16; 6857 adev->gds.gws.cs_partition_size = 16; 6858 6859 adev->gds.oa.gfx_partition_size = 4; 6860 adev->gds.oa.cs_partition_size = 4; 6861 } 6862 } 6863 6864 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6865 u32 bitmap) 6866 { 6867 u32 data; 6868 6869 if (!bitmap) 6870 return; 6871 6872 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6873 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6874 6875 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 6876 } 6877 6878 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6879 { 6880 u32 data, mask; 6881 6882 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6883 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6884 6885 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6886 6887 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6888 } 6889 6890 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6891 { 6892 int i, j, k, counter, active_cu_number = 0; 6893 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6894 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6895 unsigned disable_masks[4 * 2]; 6896 u32 ao_cu_num; 6897 6898 memset(cu_info, 0, sizeof(*cu_info)); 6899 6900 if (adev->flags & AMD_IS_APU) 6901 ao_cu_num = 2; 6902 else 6903 ao_cu_num = adev->gfx.config.max_cu_per_sh; 6904 6905 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 6906 6907 mutex_lock(&adev->grbm_idx_mutex); 6908 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6909 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6910 mask = 1; 6911 ao_bitmap = 0; 6912 counter = 0; 6913 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 6914 if (i < 4 && j < 2) 6915 gfx_v8_0_set_user_cu_inactive_bitmap( 6916 adev, disable_masks[i * 2 + j]); 6917 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6918 cu_info->bitmap[i][j] = bitmap; 6919 6920 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6921 if (bitmap & mask) { 6922 if (counter < ao_cu_num) 6923 ao_bitmap |= mask; 6924 counter ++; 6925 } 6926 mask <<= 1; 6927 } 6928 active_cu_number += counter; 6929 if (i < 2 && j < 2) 6930 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6931 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 6932 } 6933 } 6934 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6935 mutex_unlock(&adev->grbm_idx_mutex); 6936 6937 cu_info->number = active_cu_number; 6938 cu_info->ao_cu_mask = ao_cu_mask; 6939 } 6940 6941 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 6942 { 6943 .type = AMD_IP_BLOCK_TYPE_GFX, 6944 .major = 8, 6945 .minor = 0, 6946 .rev = 0, 6947 .funcs = &gfx_v8_0_ip_funcs, 6948 }; 6949 6950 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 6951 { 6952 .type = AMD_IP_BLOCK_TYPE_GFX, 6953 .major = 8, 6954 .minor = 1, 6955 .rev = 0, 6956 .funcs = &gfx_v8_0_ip_funcs, 6957 }; 6958 6959 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 6960 { 6961 uint64_t ce_payload_addr; 6962 int cnt_ce; 6963 static union { 6964 struct vi_ce_ib_state regular; 6965 struct vi_ce_ib_state_chained_ib chained; 6966 } ce_payload = {}; 6967 6968 if (ring->adev->virt.chained_ib_support) { 6969 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 6970 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 6971 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 6972 } else { 6973 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 6974 offsetof(struct vi_gfx_meta_data, ce_payload); 6975 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 6976 } 6977 6978 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 6979 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 6980 WRITE_DATA_DST_SEL(8) | 6981 WR_CONFIRM) | 6982 WRITE_DATA_CACHE_POLICY(0)); 6983 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 6984 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 6985 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 6986 } 6987 6988 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 6989 { 6990 uint64_t de_payload_addr, gds_addr, csa_addr; 6991 int cnt_de; 6992 static union { 6993 struct vi_de_ib_state regular; 6994 struct vi_de_ib_state_chained_ib chained; 6995 } de_payload = {}; 6996 6997 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 6998 gds_addr = csa_addr + 4096; 6999 if (ring->adev->virt.chained_ib_support) { 7000 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7001 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7002 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7003 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7004 } else { 7005 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7006 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7007 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7008 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7009 } 7010 7011 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7012 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7013 WRITE_DATA_DST_SEL(8) | 7014 WR_CONFIRM) | 7015 WRITE_DATA_CACHE_POLICY(0)); 7016 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7017 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7018 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7019 } 7020