1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_MEC_HPD_SIZE 2048 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 139 140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 151 152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 163 164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 165 { 166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 167 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 168 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 169 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 170 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 171 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 172 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 173 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 174 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 175 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 176 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 177 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 178 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 179 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 180 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 181 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 182 }; 183 184 static const u32 golden_settings_tonga_a11[] = 185 { 186 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 187 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 188 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 189 mmGB_GPU_ID, 0x0000000f, 0x00000000, 190 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 191 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 192 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 193 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 194 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 195 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 196 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 197 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 198 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 199 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 200 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 201 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 202 }; 203 204 static const u32 tonga_golden_common_all[] = 205 { 206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 207 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 208 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 209 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 210 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 211 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 212 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 213 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 214 }; 215 216 static const u32 tonga_mgcg_cgcg_init[] = 217 { 218 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 220 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 221 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 222 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 223 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 224 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 225 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 227 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 228 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 229 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 234 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 236 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 237 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 238 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 239 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 240 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 243 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 244 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 245 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 246 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 247 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 248 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 249 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 265 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 266 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 267 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 268 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 269 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 270 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 271 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 272 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 273 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 274 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 275 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 276 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 277 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 278 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 279 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 280 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 281 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 282 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 283 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 284 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 285 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 286 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 287 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 288 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 289 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 290 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 291 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 293 }; 294 295 static const u32 golden_settings_polaris11_a11[] = 296 { 297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 298 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 299 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 300 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 301 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 302 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 303 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 304 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 305 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 306 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 307 mmSQ_CONFIG, 0x07f80000, 0x01180000, 308 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 309 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 310 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 311 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 312 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 314 }; 315 316 static const u32 polaris11_golden_common_all[] = 317 { 318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 319 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 320 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 321 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 322 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 323 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 324 }; 325 326 static const u32 golden_settings_polaris10_a11[] = 327 { 328 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 329 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 330 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 331 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 332 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 333 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 334 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 335 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 336 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 337 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 338 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 339 mmSQ_CONFIG, 0x07f80000, 0x07180000, 340 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 341 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 342 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 343 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 344 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 345 }; 346 347 static const u32 polaris10_golden_common_all[] = 348 { 349 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 350 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 351 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 352 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 353 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 354 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 355 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 356 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 357 }; 358 359 static const u32 fiji_golden_common_all[] = 360 { 361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 362 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 363 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 364 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 365 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 366 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 367 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 368 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 370 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 371 }; 372 373 static const u32 golden_settings_fiji_a10[] = 374 { 375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 376 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 379 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 380 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 381 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 382 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 383 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 384 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 385 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 386 }; 387 388 static const u32 fiji_mgcg_cgcg_init[] = 389 { 390 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 391 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 392 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 393 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 394 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 395 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 396 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 397 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 399 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 400 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 401 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 402 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 403 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 404 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 405 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 406 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 407 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 408 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 409 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 410 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 411 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 412 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 413 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 414 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 415 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 416 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 417 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 418 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 419 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 421 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 422 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 423 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 424 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 425 }; 426 427 static const u32 golden_settings_iceland_a11[] = 428 { 429 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 430 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 431 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 432 mmGB_GPU_ID, 0x0000000f, 0x00000000, 433 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 434 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 435 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 436 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 437 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 438 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 439 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 440 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 441 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 442 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 443 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 444 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 445 }; 446 447 static const u32 iceland_golden_common_all[] = 448 { 449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 450 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 451 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 452 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 453 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 454 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 455 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 456 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 457 }; 458 459 static const u32 iceland_mgcg_cgcg_init[] = 460 { 461 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 463 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 464 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 465 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 466 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 467 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 468 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 469 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 470 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 471 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 472 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 473 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 474 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 475 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 476 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 477 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 478 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 479 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 480 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 481 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 482 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 483 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 484 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 485 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 486 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 487 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 488 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 489 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 490 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 491 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 492 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 493 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 494 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 495 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 496 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 497 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 498 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 499 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 500 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 501 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 502 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 503 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 504 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 505 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 506 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 507 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 508 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 509 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 510 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 511 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 512 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 513 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 514 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 515 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 516 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 517 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 518 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 519 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 520 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 521 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 522 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 523 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 524 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 525 }; 526 527 static const u32 cz_golden_settings_a11[] = 528 { 529 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 530 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 531 mmGB_GPU_ID, 0x0000000f, 0x00000000, 532 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 533 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 534 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 535 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 536 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 537 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 538 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 539 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 540 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 541 }; 542 543 static const u32 cz_golden_common_all[] = 544 { 545 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 546 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 547 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 548 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 549 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 550 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 551 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 552 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 553 }; 554 555 static const u32 cz_mgcg_cgcg_init[] = 556 { 557 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 558 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 559 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 560 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 561 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 562 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 563 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 564 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 566 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 567 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 568 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 569 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 570 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 571 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 572 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 573 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 574 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 575 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 576 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 577 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 578 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 579 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 580 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 581 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 582 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 583 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 584 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 585 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 586 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 587 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 588 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 589 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 590 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 591 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 592 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 593 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 594 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 595 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 596 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 597 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 598 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 599 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 600 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 601 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 602 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 603 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 604 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 605 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 606 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 607 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 608 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 609 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 610 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 611 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 612 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 613 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 614 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 615 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 616 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 617 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 618 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 619 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 620 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 621 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 622 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 623 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 624 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 625 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 626 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 627 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 628 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 629 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 631 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 632 }; 633 634 static const u32 stoney_golden_settings_a11[] = 635 { 636 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 637 mmGB_GPU_ID, 0x0000000f, 0x00000000, 638 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 639 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 640 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 641 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 642 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 643 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 644 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 645 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 646 }; 647 648 static const u32 stoney_golden_common_all[] = 649 { 650 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 651 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 652 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 653 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 654 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 655 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 656 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 657 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 658 }; 659 660 static const u32 stoney_mgcg_cgcg_init[] = 661 { 662 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 663 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 664 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 665 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 667 }; 668 669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 677 678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 679 { 680 switch (adev->asic_type) { 681 case CHIP_TOPAZ: 682 amdgpu_device_program_register_sequence(adev, 683 iceland_mgcg_cgcg_init, 684 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 685 amdgpu_device_program_register_sequence(adev, 686 golden_settings_iceland_a11, 687 ARRAY_SIZE(golden_settings_iceland_a11)); 688 amdgpu_device_program_register_sequence(adev, 689 iceland_golden_common_all, 690 ARRAY_SIZE(iceland_golden_common_all)); 691 break; 692 case CHIP_FIJI: 693 amdgpu_device_program_register_sequence(adev, 694 fiji_mgcg_cgcg_init, 695 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 696 amdgpu_device_program_register_sequence(adev, 697 golden_settings_fiji_a10, 698 ARRAY_SIZE(golden_settings_fiji_a10)); 699 amdgpu_device_program_register_sequence(adev, 700 fiji_golden_common_all, 701 ARRAY_SIZE(fiji_golden_common_all)); 702 break; 703 704 case CHIP_TONGA: 705 amdgpu_device_program_register_sequence(adev, 706 tonga_mgcg_cgcg_init, 707 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 708 amdgpu_device_program_register_sequence(adev, 709 golden_settings_tonga_a11, 710 ARRAY_SIZE(golden_settings_tonga_a11)); 711 amdgpu_device_program_register_sequence(adev, 712 tonga_golden_common_all, 713 ARRAY_SIZE(tonga_golden_common_all)); 714 break; 715 case CHIP_POLARIS11: 716 case CHIP_POLARIS12: 717 amdgpu_device_program_register_sequence(adev, 718 golden_settings_polaris11_a11, 719 ARRAY_SIZE(golden_settings_polaris11_a11)); 720 amdgpu_device_program_register_sequence(adev, 721 polaris11_golden_common_all, 722 ARRAY_SIZE(polaris11_golden_common_all)); 723 break; 724 case CHIP_POLARIS10: 725 amdgpu_device_program_register_sequence(adev, 726 golden_settings_polaris10_a11, 727 ARRAY_SIZE(golden_settings_polaris10_a11)); 728 amdgpu_device_program_register_sequence(adev, 729 polaris10_golden_common_all, 730 ARRAY_SIZE(polaris10_golden_common_all)); 731 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 732 if (adev->pdev->revision == 0xc7 && 733 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 734 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 735 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 736 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 737 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 738 } 739 break; 740 case CHIP_CARRIZO: 741 amdgpu_device_program_register_sequence(adev, 742 cz_mgcg_cgcg_init, 743 ARRAY_SIZE(cz_mgcg_cgcg_init)); 744 amdgpu_device_program_register_sequence(adev, 745 cz_golden_settings_a11, 746 ARRAY_SIZE(cz_golden_settings_a11)); 747 amdgpu_device_program_register_sequence(adev, 748 cz_golden_common_all, 749 ARRAY_SIZE(cz_golden_common_all)); 750 break; 751 case CHIP_STONEY: 752 amdgpu_device_program_register_sequence(adev, 753 stoney_mgcg_cgcg_init, 754 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 755 amdgpu_device_program_register_sequence(adev, 756 stoney_golden_settings_a11, 757 ARRAY_SIZE(stoney_golden_settings_a11)); 758 amdgpu_device_program_register_sequence(adev, 759 stoney_golden_common_all, 760 ARRAY_SIZE(stoney_golden_common_all)); 761 break; 762 default: 763 break; 764 } 765 } 766 767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 768 { 769 adev->gfx.scratch.num_reg = 8; 770 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 772 } 773 774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 775 { 776 struct amdgpu_device *adev = ring->adev; 777 uint32_t scratch; 778 uint32_t tmp = 0; 779 unsigned i; 780 int r; 781 782 r = amdgpu_gfx_scratch_get(adev, &scratch); 783 if (r) { 784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 785 return r; 786 } 787 WREG32(scratch, 0xCAFEDEAD); 788 r = amdgpu_ring_alloc(ring, 3); 789 if (r) { 790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 791 ring->idx, r); 792 amdgpu_gfx_scratch_free(adev, scratch); 793 return r; 794 } 795 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 796 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 797 amdgpu_ring_write(ring, 0xDEADBEEF); 798 amdgpu_ring_commit(ring); 799 800 for (i = 0; i < adev->usec_timeout; i++) { 801 tmp = RREG32(scratch); 802 if (tmp == 0xDEADBEEF) 803 break; 804 DRM_UDELAY(1); 805 } 806 if (i < adev->usec_timeout) { 807 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 808 ring->idx, i); 809 } else { 810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 811 ring->idx, scratch, tmp); 812 r = -EINVAL; 813 } 814 amdgpu_gfx_scratch_free(adev, scratch); 815 return r; 816 } 817 818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 819 { 820 struct amdgpu_device *adev = ring->adev; 821 struct amdgpu_ib ib; 822 struct dma_fence *f = NULL; 823 uint32_t scratch; 824 uint32_t tmp = 0; 825 long r; 826 827 r = amdgpu_gfx_scratch_get(adev, &scratch); 828 if (r) { 829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 830 return r; 831 } 832 WREG32(scratch, 0xCAFEDEAD); 833 memset(&ib, 0, sizeof(ib)); 834 r = amdgpu_ib_get(adev, NULL, 256, &ib); 835 if (r) { 836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 837 goto err1; 838 } 839 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 840 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 841 ib.ptr[2] = 0xDEADBEEF; 842 ib.length_dw = 3; 843 844 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 845 if (r) 846 goto err2; 847 848 r = dma_fence_wait_timeout(f, false, timeout); 849 if (r == 0) { 850 DRM_ERROR("amdgpu: IB test timed out.\n"); 851 r = -ETIMEDOUT; 852 goto err2; 853 } else if (r < 0) { 854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 855 goto err2; 856 } 857 tmp = RREG32(scratch); 858 if (tmp == 0xDEADBEEF) { 859 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 860 r = 0; 861 } else { 862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 863 scratch, tmp); 864 r = -EINVAL; 865 } 866 err2: 867 amdgpu_ib_free(adev, &ib, NULL); 868 dma_fence_put(f); 869 err1: 870 amdgpu_gfx_scratch_free(adev, scratch); 871 return r; 872 } 873 874 875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 876 { 877 release_firmware(adev->gfx.pfp_fw); 878 adev->gfx.pfp_fw = NULL; 879 release_firmware(adev->gfx.me_fw); 880 adev->gfx.me_fw = NULL; 881 release_firmware(adev->gfx.ce_fw); 882 adev->gfx.ce_fw = NULL; 883 release_firmware(adev->gfx.rlc_fw); 884 adev->gfx.rlc_fw = NULL; 885 release_firmware(adev->gfx.mec_fw); 886 adev->gfx.mec_fw = NULL; 887 if ((adev->asic_type != CHIP_STONEY) && 888 (adev->asic_type != CHIP_TOPAZ)) 889 release_firmware(adev->gfx.mec2_fw); 890 adev->gfx.mec2_fw = NULL; 891 892 kfree(adev->gfx.rlc.register_list_format); 893 } 894 895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 896 { 897 const char *chip_name; 898 char fw_name[30]; 899 int err; 900 struct amdgpu_firmware_info *info = NULL; 901 const struct common_firmware_header *header = NULL; 902 const struct gfx_firmware_header_v1_0 *cp_hdr; 903 const struct rlc_firmware_header_v2_0 *rlc_hdr; 904 unsigned int *tmp = NULL, i; 905 906 DRM_DEBUG("\n"); 907 908 switch (adev->asic_type) { 909 case CHIP_TOPAZ: 910 chip_name = "topaz"; 911 break; 912 case CHIP_TONGA: 913 chip_name = "tonga"; 914 break; 915 case CHIP_CARRIZO: 916 chip_name = "carrizo"; 917 break; 918 case CHIP_FIJI: 919 chip_name = "fiji"; 920 break; 921 case CHIP_POLARIS11: 922 chip_name = "polaris11"; 923 break; 924 case CHIP_POLARIS10: 925 chip_name = "polaris10"; 926 break; 927 case CHIP_POLARIS12: 928 chip_name = "polaris12"; 929 break; 930 case CHIP_STONEY: 931 chip_name = "stoney"; 932 break; 933 default: 934 BUG(); 935 } 936 937 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 939 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 940 if (err == -ENOENT) { 941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 942 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 943 } 944 } else { 945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 946 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 947 } 948 if (err) 949 goto out; 950 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 951 if (err) 952 goto out; 953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 954 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 955 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 956 957 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 958 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 959 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 960 if (err == -ENOENT) { 961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 962 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 963 } 964 } else { 965 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 966 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 967 } 968 if (err) 969 goto out; 970 err = amdgpu_ucode_validate(adev->gfx.me_fw); 971 if (err) 972 goto out; 973 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 974 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 975 976 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 977 978 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 979 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 980 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 981 if (err == -ENOENT) { 982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 983 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 984 } 985 } else { 986 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 987 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 988 } 989 if (err) 990 goto out; 991 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 992 if (err) 993 goto out; 994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 995 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 996 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 997 998 /* 999 * Support for MCBP/Virtualization in combination with chained IBs is 1000 * formal released on feature version #46 1001 */ 1002 if (adev->gfx.ce_feature_version >= 46 && 1003 adev->gfx.pfp_feature_version >= 46) { 1004 adev->virt.chained_ib_support = true; 1005 DRM_INFO("Chained IB support enabled!\n"); 1006 } else 1007 adev->virt.chained_ib_support = false; 1008 1009 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1010 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1011 if (err) 1012 goto out; 1013 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1014 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1015 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1016 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1017 1018 adev->gfx.rlc.save_and_restore_offset = 1019 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1020 adev->gfx.rlc.clear_state_descriptor_offset = 1021 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1022 adev->gfx.rlc.avail_scratch_ram_locations = 1023 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1024 adev->gfx.rlc.reg_restore_list_size = 1025 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1026 adev->gfx.rlc.reg_list_format_start = 1027 le32_to_cpu(rlc_hdr->reg_list_format_start); 1028 adev->gfx.rlc.reg_list_format_separate_start = 1029 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1030 adev->gfx.rlc.starting_offsets_start = 1031 le32_to_cpu(rlc_hdr->starting_offsets_start); 1032 adev->gfx.rlc.reg_list_format_size_bytes = 1033 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1034 adev->gfx.rlc.reg_list_size_bytes = 1035 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1036 1037 adev->gfx.rlc.register_list_format = 1038 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1039 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1040 1041 if (!adev->gfx.rlc.register_list_format) { 1042 err = -ENOMEM; 1043 goto out; 1044 } 1045 1046 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1047 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1048 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1049 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1050 1051 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1052 1053 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1054 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1055 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1056 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1057 1058 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1059 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1060 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1061 if (err == -ENOENT) { 1062 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1063 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1064 } 1065 } else { 1066 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1067 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1068 } 1069 if (err) 1070 goto out; 1071 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1072 if (err) 1073 goto out; 1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1075 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1076 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1077 1078 if ((adev->asic_type != CHIP_STONEY) && 1079 (adev->asic_type != CHIP_TOPAZ)) { 1080 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1081 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1082 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1083 if (err == -ENOENT) { 1084 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1085 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1086 } 1087 } else { 1088 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1089 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1090 } 1091 if (!err) { 1092 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1093 if (err) 1094 goto out; 1095 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1096 adev->gfx.mec2_fw->data; 1097 adev->gfx.mec2_fw_version = 1098 le32_to_cpu(cp_hdr->header.ucode_version); 1099 adev->gfx.mec2_feature_version = 1100 le32_to_cpu(cp_hdr->ucode_feature_version); 1101 } else { 1102 err = 0; 1103 adev->gfx.mec2_fw = NULL; 1104 } 1105 } 1106 1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1110 info->fw = adev->gfx.pfp_fw; 1111 header = (const struct common_firmware_header *)info->fw->data; 1112 adev->firmware.fw_size += 1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1114 1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1117 info->fw = adev->gfx.me_fw; 1118 header = (const struct common_firmware_header *)info->fw->data; 1119 adev->firmware.fw_size += 1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1121 1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1124 info->fw = adev->gfx.ce_fw; 1125 header = (const struct common_firmware_header *)info->fw->data; 1126 adev->firmware.fw_size += 1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1128 1129 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1130 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1131 info->fw = adev->gfx.rlc_fw; 1132 header = (const struct common_firmware_header *)info->fw->data; 1133 adev->firmware.fw_size += 1134 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1135 1136 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1137 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1138 info->fw = adev->gfx.mec_fw; 1139 header = (const struct common_firmware_header *)info->fw->data; 1140 adev->firmware.fw_size += 1141 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1142 1143 /* we need account JT in */ 1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1145 adev->firmware.fw_size += 1146 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1147 1148 if (amdgpu_sriov_vf(adev)) { 1149 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1150 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1151 info->fw = adev->gfx.mec_fw; 1152 adev->firmware.fw_size += 1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1154 } 1155 1156 if (adev->gfx.mec2_fw) { 1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1158 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1159 info->fw = adev->gfx.mec2_fw; 1160 header = (const struct common_firmware_header *)info->fw->data; 1161 adev->firmware.fw_size += 1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1163 } 1164 1165 } 1166 1167 out: 1168 if (err) { 1169 dev_err(adev->dev, 1170 "gfx8: Failed to load firmware \"%s\"\n", 1171 fw_name); 1172 release_firmware(adev->gfx.pfp_fw); 1173 adev->gfx.pfp_fw = NULL; 1174 release_firmware(adev->gfx.me_fw); 1175 adev->gfx.me_fw = NULL; 1176 release_firmware(adev->gfx.ce_fw); 1177 adev->gfx.ce_fw = NULL; 1178 release_firmware(adev->gfx.rlc_fw); 1179 adev->gfx.rlc_fw = NULL; 1180 release_firmware(adev->gfx.mec_fw); 1181 adev->gfx.mec_fw = NULL; 1182 release_firmware(adev->gfx.mec2_fw); 1183 adev->gfx.mec2_fw = NULL; 1184 } 1185 return err; 1186 } 1187 1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1189 volatile u32 *buffer) 1190 { 1191 u32 count = 0, i; 1192 const struct cs_section_def *sect = NULL; 1193 const struct cs_extent_def *ext = NULL; 1194 1195 if (adev->gfx.rlc.cs_data == NULL) 1196 return; 1197 if (buffer == NULL) 1198 return; 1199 1200 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1201 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1202 1203 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1204 buffer[count++] = cpu_to_le32(0x80000000); 1205 buffer[count++] = cpu_to_le32(0x80000000); 1206 1207 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1208 for (ext = sect->section; ext->extent != NULL; ++ext) { 1209 if (sect->id == SECT_CONTEXT) { 1210 buffer[count++] = 1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1212 buffer[count++] = cpu_to_le32(ext->reg_index - 1213 PACKET3_SET_CONTEXT_REG_START); 1214 for (i = 0; i < ext->reg_count; i++) 1215 buffer[count++] = cpu_to_le32(ext->extent[i]); 1216 } else { 1217 return; 1218 } 1219 } 1220 } 1221 1222 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1223 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1224 PACKET3_SET_CONTEXT_REG_START); 1225 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1226 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1227 1228 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1229 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1230 1231 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1232 buffer[count++] = cpu_to_le32(0); 1233 } 1234 1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1236 { 1237 const __le32 *fw_data; 1238 volatile u32 *dst_ptr; 1239 int me, i, max_me = 4; 1240 u32 bo_offset = 0; 1241 u32 table_offset, table_size; 1242 1243 if (adev->asic_type == CHIP_CARRIZO) 1244 max_me = 5; 1245 1246 /* write the cp table buffer */ 1247 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1248 for (me = 0; me < max_me; me++) { 1249 if (me == 0) { 1250 const struct gfx_firmware_header_v1_0 *hdr = 1251 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1252 fw_data = (const __le32 *) 1253 (adev->gfx.ce_fw->data + 1254 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1255 table_offset = le32_to_cpu(hdr->jt_offset); 1256 table_size = le32_to_cpu(hdr->jt_size); 1257 } else if (me == 1) { 1258 const struct gfx_firmware_header_v1_0 *hdr = 1259 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1260 fw_data = (const __le32 *) 1261 (adev->gfx.pfp_fw->data + 1262 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1263 table_offset = le32_to_cpu(hdr->jt_offset); 1264 table_size = le32_to_cpu(hdr->jt_size); 1265 } else if (me == 2) { 1266 const struct gfx_firmware_header_v1_0 *hdr = 1267 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1268 fw_data = (const __le32 *) 1269 (adev->gfx.me_fw->data + 1270 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1271 table_offset = le32_to_cpu(hdr->jt_offset); 1272 table_size = le32_to_cpu(hdr->jt_size); 1273 } else if (me == 3) { 1274 const struct gfx_firmware_header_v1_0 *hdr = 1275 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1276 fw_data = (const __le32 *) 1277 (adev->gfx.mec_fw->data + 1278 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1279 table_offset = le32_to_cpu(hdr->jt_offset); 1280 table_size = le32_to_cpu(hdr->jt_size); 1281 } else if (me == 4) { 1282 const struct gfx_firmware_header_v1_0 *hdr = 1283 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1284 fw_data = (const __le32 *) 1285 (adev->gfx.mec2_fw->data + 1286 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1287 table_offset = le32_to_cpu(hdr->jt_offset); 1288 table_size = le32_to_cpu(hdr->jt_size); 1289 } 1290 1291 for (i = 0; i < table_size; i ++) { 1292 dst_ptr[bo_offset + i] = 1293 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1294 } 1295 1296 bo_offset += table_size; 1297 } 1298 } 1299 1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1301 { 1302 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1303 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1304 } 1305 1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1307 { 1308 volatile u32 *dst_ptr; 1309 u32 dws; 1310 const struct cs_section_def *cs_data; 1311 int r; 1312 1313 adev->gfx.rlc.cs_data = vi_cs_data; 1314 1315 cs_data = adev->gfx.rlc.cs_data; 1316 1317 if (cs_data) { 1318 /* clear state block */ 1319 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1320 1321 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1322 AMDGPU_GEM_DOMAIN_VRAM, 1323 &adev->gfx.rlc.clear_state_obj, 1324 &adev->gfx.rlc.clear_state_gpu_addr, 1325 (void **)&adev->gfx.rlc.cs_ptr); 1326 if (r) { 1327 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1328 gfx_v8_0_rlc_fini(adev); 1329 return r; 1330 } 1331 1332 /* set up the cs buffer */ 1333 dst_ptr = adev->gfx.rlc.cs_ptr; 1334 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1335 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1336 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1337 } 1338 1339 if ((adev->asic_type == CHIP_CARRIZO) || 1340 (adev->asic_type == CHIP_STONEY)) { 1341 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1342 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1344 &adev->gfx.rlc.cp_table_obj, 1345 &adev->gfx.rlc.cp_table_gpu_addr, 1346 (void **)&adev->gfx.rlc.cp_table_ptr); 1347 if (r) { 1348 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1349 return r; 1350 } 1351 1352 cz_init_cp_jump_table(adev); 1353 1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1362 { 1363 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1364 } 1365 1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1367 { 1368 int r; 1369 u32 *hpd; 1370 size_t mec_hpd_size; 1371 1372 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1373 1374 /* take ownership of the relevant compute queues */ 1375 amdgpu_gfx_compute_queue_acquire(adev); 1376 1377 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1378 1379 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1380 AMDGPU_GEM_DOMAIN_GTT, 1381 &adev->gfx.mec.hpd_eop_obj, 1382 &adev->gfx.mec.hpd_eop_gpu_addr, 1383 (void **)&hpd); 1384 if (r) { 1385 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1386 return r; 1387 } 1388 1389 memset(hpd, 0, mec_hpd_size); 1390 1391 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1392 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1393 1394 return 0; 1395 } 1396 1397 static const u32 vgpr_init_compute_shader[] = 1398 { 1399 0x7e000209, 0x7e020208, 1400 0x7e040207, 0x7e060206, 1401 0x7e080205, 0x7e0a0204, 1402 0x7e0c0203, 0x7e0e0202, 1403 0x7e100201, 0x7e120200, 1404 0x7e140209, 0x7e160208, 1405 0x7e180207, 0x7e1a0206, 1406 0x7e1c0205, 0x7e1e0204, 1407 0x7e200203, 0x7e220202, 1408 0x7e240201, 0x7e260200, 1409 0x7e280209, 0x7e2a0208, 1410 0x7e2c0207, 0x7e2e0206, 1411 0x7e300205, 0x7e320204, 1412 0x7e340203, 0x7e360202, 1413 0x7e380201, 0x7e3a0200, 1414 0x7e3c0209, 0x7e3e0208, 1415 0x7e400207, 0x7e420206, 1416 0x7e440205, 0x7e460204, 1417 0x7e480203, 0x7e4a0202, 1418 0x7e4c0201, 0x7e4e0200, 1419 0x7e500209, 0x7e520208, 1420 0x7e540207, 0x7e560206, 1421 0x7e580205, 0x7e5a0204, 1422 0x7e5c0203, 0x7e5e0202, 1423 0x7e600201, 0x7e620200, 1424 0x7e640209, 0x7e660208, 1425 0x7e680207, 0x7e6a0206, 1426 0x7e6c0205, 0x7e6e0204, 1427 0x7e700203, 0x7e720202, 1428 0x7e740201, 0x7e760200, 1429 0x7e780209, 0x7e7a0208, 1430 0x7e7c0207, 0x7e7e0206, 1431 0xbf8a0000, 0xbf810000, 1432 }; 1433 1434 static const u32 sgpr_init_compute_shader[] = 1435 { 1436 0xbe8a0100, 0xbe8c0102, 1437 0xbe8e0104, 0xbe900106, 1438 0xbe920108, 0xbe940100, 1439 0xbe960102, 0xbe980104, 1440 0xbe9a0106, 0xbe9c0108, 1441 0xbe9e0100, 0xbea00102, 1442 0xbea20104, 0xbea40106, 1443 0xbea60108, 0xbea80100, 1444 0xbeaa0102, 0xbeac0104, 1445 0xbeae0106, 0xbeb00108, 1446 0xbeb20100, 0xbeb40102, 1447 0xbeb60104, 0xbeb80106, 1448 0xbeba0108, 0xbebc0100, 1449 0xbebe0102, 0xbec00104, 1450 0xbec20106, 0xbec40108, 1451 0xbec60100, 0xbec80102, 1452 0xbee60004, 0xbee70005, 1453 0xbeea0006, 0xbeeb0007, 1454 0xbee80008, 0xbee90009, 1455 0xbefc0000, 0xbf8a0000, 1456 0xbf810000, 0x00000000, 1457 }; 1458 1459 static const u32 vgpr_init_regs[] = 1460 { 1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1462 mmCOMPUTE_RESOURCE_LIMITS, 0, 1463 mmCOMPUTE_NUM_THREAD_X, 256*4, 1464 mmCOMPUTE_NUM_THREAD_Y, 1, 1465 mmCOMPUTE_NUM_THREAD_Z, 1, 1466 mmCOMPUTE_PGM_RSRC2, 20, 1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1477 }; 1478 1479 static const u32 sgpr1_init_regs[] = 1480 { 1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1483 mmCOMPUTE_NUM_THREAD_X, 256*5, 1484 mmCOMPUTE_NUM_THREAD_Y, 1, 1485 mmCOMPUTE_NUM_THREAD_Z, 1, 1486 mmCOMPUTE_PGM_RSRC2, 20, 1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1497 }; 1498 1499 static const u32 sgpr2_init_regs[] = 1500 { 1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1502 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1503 mmCOMPUTE_NUM_THREAD_X, 256*5, 1504 mmCOMPUTE_NUM_THREAD_Y, 1, 1505 mmCOMPUTE_NUM_THREAD_Z, 1, 1506 mmCOMPUTE_PGM_RSRC2, 20, 1507 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1508 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1509 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1510 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1511 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1512 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1513 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1514 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1515 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1516 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1517 }; 1518 1519 static const u32 sec_ded_counter_registers[] = 1520 { 1521 mmCPC_EDC_ATC_CNT, 1522 mmCPC_EDC_SCRATCH_CNT, 1523 mmCPC_EDC_UCODE_CNT, 1524 mmCPF_EDC_ATC_CNT, 1525 mmCPF_EDC_ROQ_CNT, 1526 mmCPF_EDC_TAG_CNT, 1527 mmCPG_EDC_ATC_CNT, 1528 mmCPG_EDC_DMA_CNT, 1529 mmCPG_EDC_TAG_CNT, 1530 mmDC_EDC_CSINVOC_CNT, 1531 mmDC_EDC_RESTORE_CNT, 1532 mmDC_EDC_STATE_CNT, 1533 mmGDS_EDC_CNT, 1534 mmGDS_EDC_GRBM_CNT, 1535 mmGDS_EDC_OA_DED, 1536 mmSPI_EDC_CNT, 1537 mmSQC_ATC_EDC_GATCL1_CNT, 1538 mmSQC_EDC_CNT, 1539 mmSQ_EDC_DED_CNT, 1540 mmSQ_EDC_INFO, 1541 mmSQ_EDC_SEC_CNT, 1542 mmTCC_EDC_CNT, 1543 mmTCP_ATC_EDC_GATCL1_CNT, 1544 mmTCP_EDC_CNT, 1545 mmTD_EDC_CNT 1546 }; 1547 1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1549 { 1550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1551 struct amdgpu_ib ib; 1552 struct dma_fence *f = NULL; 1553 int r, i; 1554 u32 tmp; 1555 unsigned total_size, vgpr_offset, sgpr_offset; 1556 u64 gpu_addr; 1557 1558 /* only supported on CZ */ 1559 if (adev->asic_type != CHIP_CARRIZO) 1560 return 0; 1561 1562 /* bail if the compute ring is not ready */ 1563 if (!ring->ready) 1564 return 0; 1565 1566 tmp = RREG32(mmGB_EDC_MODE); 1567 WREG32(mmGB_EDC_MODE, 0); 1568 1569 total_size = 1570 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1571 total_size += 1572 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1573 total_size += 1574 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1575 total_size = ALIGN(total_size, 256); 1576 vgpr_offset = total_size; 1577 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1578 sgpr_offset = total_size; 1579 total_size += sizeof(sgpr_init_compute_shader); 1580 1581 /* allocate an indirect buffer to put the commands in */ 1582 memset(&ib, 0, sizeof(ib)); 1583 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1584 if (r) { 1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1586 return r; 1587 } 1588 1589 /* load the compute shaders */ 1590 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1591 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1592 1593 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1594 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1595 1596 /* init the ib length to 0 */ 1597 ib.length_dw = 0; 1598 1599 /* VGPR */ 1600 /* write the register state for the compute dispatch */ 1601 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1603 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1604 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1605 } 1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1607 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1612 1613 /* write dispatch packet */ 1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1615 ib.ptr[ib.length_dw++] = 8; /* x */ 1616 ib.ptr[ib.length_dw++] = 1; /* y */ 1617 ib.ptr[ib.length_dw++] = 1; /* z */ 1618 ib.ptr[ib.length_dw++] = 1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1620 1621 /* write CS partial flush packet */ 1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1624 1625 /* SGPR1 */ 1626 /* write the register state for the compute dispatch */ 1627 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1629 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1630 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1631 } 1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1638 1639 /* write dispatch packet */ 1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1641 ib.ptr[ib.length_dw++] = 8; /* x */ 1642 ib.ptr[ib.length_dw++] = 1; /* y */ 1643 ib.ptr[ib.length_dw++] = 1; /* z */ 1644 ib.ptr[ib.length_dw++] = 1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1646 1647 /* write CS partial flush packet */ 1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1650 1651 /* SGPR2 */ 1652 /* write the register state for the compute dispatch */ 1653 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1655 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1656 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1657 } 1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1664 1665 /* write dispatch packet */ 1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1667 ib.ptr[ib.length_dw++] = 8; /* x */ 1668 ib.ptr[ib.length_dw++] = 1; /* y */ 1669 ib.ptr[ib.length_dw++] = 1; /* z */ 1670 ib.ptr[ib.length_dw++] = 1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1672 1673 /* write CS partial flush packet */ 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1676 1677 /* shedule the ib on the ring */ 1678 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1679 if (r) { 1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1681 goto fail; 1682 } 1683 1684 /* wait for the GPU to finish processing the IB */ 1685 r = dma_fence_wait(f, false); 1686 if (r) { 1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1688 goto fail; 1689 } 1690 1691 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1692 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1693 WREG32(mmGB_EDC_MODE, tmp); 1694 1695 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1696 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1697 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1698 1699 1700 /* read back registers to clear the counters */ 1701 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1702 RREG32(sec_ded_counter_registers[i]); 1703 1704 fail: 1705 amdgpu_ib_free(adev, &ib, NULL); 1706 dma_fence_put(f); 1707 1708 return r; 1709 } 1710 1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1712 { 1713 u32 gb_addr_config; 1714 u32 mc_shared_chmap, mc_arb_ramcfg; 1715 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1716 u32 tmp; 1717 int ret; 1718 1719 switch (adev->asic_type) { 1720 case CHIP_TOPAZ: 1721 adev->gfx.config.max_shader_engines = 1; 1722 adev->gfx.config.max_tile_pipes = 2; 1723 adev->gfx.config.max_cu_per_sh = 6; 1724 adev->gfx.config.max_sh_per_se = 1; 1725 adev->gfx.config.max_backends_per_se = 2; 1726 adev->gfx.config.max_texture_channel_caches = 2; 1727 adev->gfx.config.max_gprs = 256; 1728 adev->gfx.config.max_gs_threads = 32; 1729 adev->gfx.config.max_hw_contexts = 8; 1730 1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1735 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1736 break; 1737 case CHIP_FIJI: 1738 adev->gfx.config.max_shader_engines = 4; 1739 adev->gfx.config.max_tile_pipes = 16; 1740 adev->gfx.config.max_cu_per_sh = 16; 1741 adev->gfx.config.max_sh_per_se = 1; 1742 adev->gfx.config.max_backends_per_se = 4; 1743 adev->gfx.config.max_texture_channel_caches = 16; 1744 adev->gfx.config.max_gprs = 256; 1745 adev->gfx.config.max_gs_threads = 32; 1746 adev->gfx.config.max_hw_contexts = 8; 1747 1748 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1749 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1750 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1751 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1752 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1753 break; 1754 case CHIP_POLARIS11: 1755 case CHIP_POLARIS12: 1756 ret = amdgpu_atombios_get_gfx_info(adev); 1757 if (ret) 1758 return ret; 1759 adev->gfx.config.max_gprs = 256; 1760 adev->gfx.config.max_gs_threads = 32; 1761 adev->gfx.config.max_hw_contexts = 8; 1762 1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1767 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1768 break; 1769 case CHIP_POLARIS10: 1770 ret = amdgpu_atombios_get_gfx_info(adev); 1771 if (ret) 1772 return ret; 1773 adev->gfx.config.max_gprs = 256; 1774 adev->gfx.config.max_gs_threads = 32; 1775 adev->gfx.config.max_hw_contexts = 8; 1776 1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1781 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1782 break; 1783 case CHIP_TONGA: 1784 adev->gfx.config.max_shader_engines = 4; 1785 adev->gfx.config.max_tile_pipes = 8; 1786 adev->gfx.config.max_cu_per_sh = 8; 1787 adev->gfx.config.max_sh_per_se = 1; 1788 adev->gfx.config.max_backends_per_se = 2; 1789 adev->gfx.config.max_texture_channel_caches = 8; 1790 adev->gfx.config.max_gprs = 256; 1791 adev->gfx.config.max_gs_threads = 32; 1792 adev->gfx.config.max_hw_contexts = 8; 1793 1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1799 break; 1800 case CHIP_CARRIZO: 1801 adev->gfx.config.max_shader_engines = 1; 1802 adev->gfx.config.max_tile_pipes = 2; 1803 adev->gfx.config.max_sh_per_se = 1; 1804 adev->gfx.config.max_backends_per_se = 2; 1805 adev->gfx.config.max_cu_per_sh = 8; 1806 adev->gfx.config.max_texture_channel_caches = 2; 1807 adev->gfx.config.max_gprs = 256; 1808 adev->gfx.config.max_gs_threads = 32; 1809 adev->gfx.config.max_hw_contexts = 8; 1810 1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1815 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1816 break; 1817 case CHIP_STONEY: 1818 adev->gfx.config.max_shader_engines = 1; 1819 adev->gfx.config.max_tile_pipes = 2; 1820 adev->gfx.config.max_sh_per_se = 1; 1821 adev->gfx.config.max_backends_per_se = 1; 1822 adev->gfx.config.max_cu_per_sh = 3; 1823 adev->gfx.config.max_texture_channel_caches = 2; 1824 adev->gfx.config.max_gprs = 256; 1825 adev->gfx.config.max_gs_threads = 16; 1826 adev->gfx.config.max_hw_contexts = 8; 1827 1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1833 break; 1834 default: 1835 adev->gfx.config.max_shader_engines = 2; 1836 adev->gfx.config.max_tile_pipes = 4; 1837 adev->gfx.config.max_cu_per_sh = 2; 1838 adev->gfx.config.max_sh_per_se = 1; 1839 adev->gfx.config.max_backends_per_se = 2; 1840 adev->gfx.config.max_texture_channel_caches = 4; 1841 adev->gfx.config.max_gprs = 256; 1842 adev->gfx.config.max_gs_threads = 32; 1843 adev->gfx.config.max_hw_contexts = 8; 1844 1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1849 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1850 break; 1851 } 1852 1853 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1854 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1855 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1856 1857 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1858 adev->gfx.config.mem_max_burst_length_bytes = 256; 1859 if (adev->flags & AMD_IS_APU) { 1860 /* Get memory bank mapping mode. */ 1861 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1862 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1863 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1864 1865 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1866 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1867 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1868 1869 /* Validate settings in case only one DIMM installed. */ 1870 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1871 dimm00_addr_map = 0; 1872 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1873 dimm01_addr_map = 0; 1874 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1875 dimm10_addr_map = 0; 1876 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1877 dimm11_addr_map = 0; 1878 1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1881 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1882 adev->gfx.config.mem_row_size_in_kb = 2; 1883 else 1884 adev->gfx.config.mem_row_size_in_kb = 1; 1885 } else { 1886 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1887 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1888 if (adev->gfx.config.mem_row_size_in_kb > 4) 1889 adev->gfx.config.mem_row_size_in_kb = 4; 1890 } 1891 1892 adev->gfx.config.shader_engine_tile_size = 32; 1893 adev->gfx.config.num_gpus = 1; 1894 adev->gfx.config.multi_gpu_tile_size = 64; 1895 1896 /* fix up row size */ 1897 switch (adev->gfx.config.mem_row_size_in_kb) { 1898 case 1: 1899 default: 1900 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1901 break; 1902 case 2: 1903 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1904 break; 1905 case 4: 1906 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1907 break; 1908 } 1909 adev->gfx.config.gb_addr_config = gb_addr_config; 1910 1911 return 0; 1912 } 1913 1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1915 int mec, int pipe, int queue) 1916 { 1917 int r; 1918 unsigned irq_type; 1919 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1920 1921 ring = &adev->gfx.compute_ring[ring_id]; 1922 1923 /* mec0 is me1 */ 1924 ring->me = mec + 1; 1925 ring->pipe = pipe; 1926 ring->queue = queue; 1927 1928 ring->ring_obj = NULL; 1929 ring->use_doorbell = true; 1930 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 1931 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1932 + (ring_id * GFX8_MEC_HPD_SIZE); 1933 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1934 1935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1936 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1937 + ring->pipe; 1938 1939 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1940 r = amdgpu_ring_init(adev, ring, 1024, 1941 &adev->gfx.eop_irq, irq_type); 1942 if (r) 1943 return r; 1944 1945 1946 return 0; 1947 } 1948 1949 static int gfx_v8_0_sw_init(void *handle) 1950 { 1951 int i, j, k, r, ring_id; 1952 struct amdgpu_ring *ring; 1953 struct amdgpu_kiq *kiq; 1954 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1955 1956 switch (adev->asic_type) { 1957 case CHIP_FIJI: 1958 case CHIP_TONGA: 1959 case CHIP_POLARIS11: 1960 case CHIP_POLARIS12: 1961 case CHIP_POLARIS10: 1962 case CHIP_CARRIZO: 1963 adev->gfx.mec.num_mec = 2; 1964 break; 1965 case CHIP_TOPAZ: 1966 case CHIP_STONEY: 1967 default: 1968 adev->gfx.mec.num_mec = 1; 1969 break; 1970 } 1971 1972 adev->gfx.mec.num_pipe_per_mec = 4; 1973 adev->gfx.mec.num_queue_per_pipe = 8; 1974 1975 /* KIQ event */ 1976 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 1977 if (r) 1978 return r; 1979 1980 /* EOP Event */ 1981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 1982 if (r) 1983 return r; 1984 1985 /* Privileged reg */ 1986 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 1987 &adev->gfx.priv_reg_irq); 1988 if (r) 1989 return r; 1990 1991 /* Privileged inst */ 1992 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 1993 &adev->gfx.priv_inst_irq); 1994 if (r) 1995 return r; 1996 1997 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1998 1999 gfx_v8_0_scratch_init(adev); 2000 2001 r = gfx_v8_0_init_microcode(adev); 2002 if (r) { 2003 DRM_ERROR("Failed to load gfx firmware!\n"); 2004 return r; 2005 } 2006 2007 r = gfx_v8_0_rlc_init(adev); 2008 if (r) { 2009 DRM_ERROR("Failed to init rlc BOs!\n"); 2010 return r; 2011 } 2012 2013 r = gfx_v8_0_mec_init(adev); 2014 if (r) { 2015 DRM_ERROR("Failed to init MEC BOs!\n"); 2016 return r; 2017 } 2018 2019 /* set up the gfx ring */ 2020 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2021 ring = &adev->gfx.gfx_ring[i]; 2022 ring->ring_obj = NULL; 2023 sprintf(ring->name, "gfx"); 2024 /* no gfx doorbells on iceland */ 2025 if (adev->asic_type != CHIP_TOPAZ) { 2026 ring->use_doorbell = true; 2027 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2028 } 2029 2030 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2031 AMDGPU_CP_IRQ_GFX_EOP); 2032 if (r) 2033 return r; 2034 } 2035 2036 2037 /* set up the compute queues - allocate horizontally across pipes */ 2038 ring_id = 0; 2039 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2040 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2041 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2043 continue; 2044 2045 r = gfx_v8_0_compute_ring_init(adev, 2046 ring_id, 2047 i, k, j); 2048 if (r) 2049 return r; 2050 2051 ring_id++; 2052 } 2053 } 2054 } 2055 2056 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2057 if (r) { 2058 DRM_ERROR("Failed to init KIQ BOs!\n"); 2059 return r; 2060 } 2061 2062 kiq = &adev->gfx.kiq; 2063 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2064 if (r) 2065 return r; 2066 2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2068 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2069 if (r) 2070 return r; 2071 2072 /* reserve GDS, GWS and OA resource for gfx */ 2073 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2074 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2075 &adev->gds.gds_gfx_bo, NULL, NULL); 2076 if (r) 2077 return r; 2078 2079 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2080 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2081 &adev->gds.gws_gfx_bo, NULL, NULL); 2082 if (r) 2083 return r; 2084 2085 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2086 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2087 &adev->gds.oa_gfx_bo, NULL, NULL); 2088 if (r) 2089 return r; 2090 2091 adev->gfx.ce_ram_size = 0x8000; 2092 2093 r = gfx_v8_0_gpu_early_init(adev); 2094 if (r) 2095 return r; 2096 2097 return 0; 2098 } 2099 2100 static int gfx_v8_0_sw_fini(void *handle) 2101 { 2102 int i; 2103 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2104 2105 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2106 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2107 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2108 2109 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2110 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2111 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2112 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2113 2114 amdgpu_gfx_compute_mqd_sw_fini(adev); 2115 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2116 amdgpu_gfx_kiq_fini(adev); 2117 2118 gfx_v8_0_mec_fini(adev); 2119 gfx_v8_0_rlc_fini(adev); 2120 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2121 &adev->gfx.rlc.clear_state_gpu_addr, 2122 (void **)&adev->gfx.rlc.cs_ptr); 2123 if ((adev->asic_type == CHIP_CARRIZO) || 2124 (adev->asic_type == CHIP_STONEY)) { 2125 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2126 &adev->gfx.rlc.cp_table_gpu_addr, 2127 (void **)&adev->gfx.rlc.cp_table_ptr); 2128 } 2129 gfx_v8_0_free_microcode(adev); 2130 2131 return 0; 2132 } 2133 2134 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2135 { 2136 uint32_t *modearray, *mod2array; 2137 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2138 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2139 u32 reg_offset; 2140 2141 modearray = adev->gfx.config.tile_mode_array; 2142 mod2array = adev->gfx.config.macrotile_mode_array; 2143 2144 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2145 modearray[reg_offset] = 0; 2146 2147 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2148 mod2array[reg_offset] = 0; 2149 2150 switch (adev->asic_type) { 2151 case CHIP_TOPAZ: 2152 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2153 PIPE_CONFIG(ADDR_SURF_P2) | 2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2156 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2157 PIPE_CONFIG(ADDR_SURF_P2) | 2158 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2160 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2161 PIPE_CONFIG(ADDR_SURF_P2) | 2162 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2164 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2165 PIPE_CONFIG(ADDR_SURF_P2) | 2166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2168 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2169 PIPE_CONFIG(ADDR_SURF_P2) | 2170 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2172 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2173 PIPE_CONFIG(ADDR_SURF_P2) | 2174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2176 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2177 PIPE_CONFIG(ADDR_SURF_P2) | 2178 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2180 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2181 PIPE_CONFIG(ADDR_SURF_P2)); 2182 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2183 PIPE_CONFIG(ADDR_SURF_P2) | 2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2186 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2187 PIPE_CONFIG(ADDR_SURF_P2) | 2188 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2190 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2191 PIPE_CONFIG(ADDR_SURF_P2) | 2192 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2194 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2195 PIPE_CONFIG(ADDR_SURF_P2) | 2196 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2198 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2199 PIPE_CONFIG(ADDR_SURF_P2) | 2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2202 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2203 PIPE_CONFIG(ADDR_SURF_P2) | 2204 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2206 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2207 PIPE_CONFIG(ADDR_SURF_P2) | 2208 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2210 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2211 PIPE_CONFIG(ADDR_SURF_P2) | 2212 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2214 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2215 PIPE_CONFIG(ADDR_SURF_P2) | 2216 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2218 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2219 PIPE_CONFIG(ADDR_SURF_P2) | 2220 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2222 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2223 PIPE_CONFIG(ADDR_SURF_P2) | 2224 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2226 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2227 PIPE_CONFIG(ADDR_SURF_P2) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2230 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2231 PIPE_CONFIG(ADDR_SURF_P2) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2234 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2235 PIPE_CONFIG(ADDR_SURF_P2) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2238 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2239 PIPE_CONFIG(ADDR_SURF_P2) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2242 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2243 PIPE_CONFIG(ADDR_SURF_P2) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2246 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2247 PIPE_CONFIG(ADDR_SURF_P2) | 2248 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2250 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2251 PIPE_CONFIG(ADDR_SURF_P2) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2254 2255 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2258 NUM_BANKS(ADDR_SURF_8_BANK)); 2259 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2262 NUM_BANKS(ADDR_SURF_8_BANK)); 2263 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2264 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2265 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2266 NUM_BANKS(ADDR_SURF_8_BANK)); 2267 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2270 NUM_BANKS(ADDR_SURF_8_BANK)); 2271 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2274 NUM_BANKS(ADDR_SURF_8_BANK)); 2275 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2278 NUM_BANKS(ADDR_SURF_8_BANK)); 2279 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2282 NUM_BANKS(ADDR_SURF_8_BANK)); 2283 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2286 NUM_BANKS(ADDR_SURF_16_BANK)); 2287 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2290 NUM_BANKS(ADDR_SURF_16_BANK)); 2291 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2294 NUM_BANKS(ADDR_SURF_16_BANK)); 2295 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2298 NUM_BANKS(ADDR_SURF_16_BANK)); 2299 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2302 NUM_BANKS(ADDR_SURF_16_BANK)); 2303 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2306 NUM_BANKS(ADDR_SURF_16_BANK)); 2307 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2310 NUM_BANKS(ADDR_SURF_8_BANK)); 2311 2312 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2313 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2314 reg_offset != 23) 2315 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2316 2317 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2318 if (reg_offset != 7) 2319 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2320 2321 break; 2322 case CHIP_FIJI: 2323 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2327 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2331 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2335 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2339 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2341 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2343 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2347 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2349 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2350 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2351 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2352 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2355 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2357 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2358 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2361 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2362 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2365 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2366 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2369 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2370 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2373 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2374 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2377 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2381 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2383 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2385 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2389 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2390 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2391 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2393 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2395 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2397 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2401 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2405 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2409 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2413 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2414 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2415 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2417 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2421 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2423 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2425 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2426 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2427 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2429 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2433 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2434 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2437 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2439 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2441 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2442 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2443 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2445 2446 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2449 NUM_BANKS(ADDR_SURF_8_BANK)); 2450 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2453 NUM_BANKS(ADDR_SURF_8_BANK)); 2454 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2457 NUM_BANKS(ADDR_SURF_8_BANK)); 2458 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2461 NUM_BANKS(ADDR_SURF_8_BANK)); 2462 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2465 NUM_BANKS(ADDR_SURF_8_BANK)); 2466 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2469 NUM_BANKS(ADDR_SURF_8_BANK)); 2470 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2473 NUM_BANKS(ADDR_SURF_8_BANK)); 2474 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2477 NUM_BANKS(ADDR_SURF_8_BANK)); 2478 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2481 NUM_BANKS(ADDR_SURF_8_BANK)); 2482 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2485 NUM_BANKS(ADDR_SURF_8_BANK)); 2486 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2489 NUM_BANKS(ADDR_SURF_8_BANK)); 2490 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2493 NUM_BANKS(ADDR_SURF_8_BANK)); 2494 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2497 NUM_BANKS(ADDR_SURF_8_BANK)); 2498 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2501 NUM_BANKS(ADDR_SURF_4_BANK)); 2502 2503 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2504 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2505 2506 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2507 if (reg_offset != 7) 2508 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2509 2510 break; 2511 case CHIP_TONGA: 2512 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2516 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2518 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2520 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2524 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2528 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2530 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2532 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2534 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2536 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2538 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2539 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2540 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2541 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2542 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2544 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2546 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2550 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2554 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2558 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2559 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2562 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2563 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2564 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2566 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2567 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2570 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2571 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2574 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2576 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2578 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2579 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2580 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2582 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2586 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2588 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2590 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2592 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2594 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2596 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2598 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2600 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2602 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2603 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2604 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2606 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2608 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2609 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2610 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2612 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2614 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2615 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2616 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2618 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2620 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2621 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2622 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2623 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2624 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2625 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2626 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2627 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2628 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2630 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2631 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2632 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2634 2635 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2638 NUM_BANKS(ADDR_SURF_16_BANK)); 2639 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2642 NUM_BANKS(ADDR_SURF_16_BANK)); 2643 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2646 NUM_BANKS(ADDR_SURF_16_BANK)); 2647 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2650 NUM_BANKS(ADDR_SURF_16_BANK)); 2651 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2654 NUM_BANKS(ADDR_SURF_16_BANK)); 2655 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2658 NUM_BANKS(ADDR_SURF_16_BANK)); 2659 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2662 NUM_BANKS(ADDR_SURF_16_BANK)); 2663 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2666 NUM_BANKS(ADDR_SURF_16_BANK)); 2667 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2670 NUM_BANKS(ADDR_SURF_16_BANK)); 2671 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2674 NUM_BANKS(ADDR_SURF_16_BANK)); 2675 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2678 NUM_BANKS(ADDR_SURF_16_BANK)); 2679 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2682 NUM_BANKS(ADDR_SURF_8_BANK)); 2683 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2686 NUM_BANKS(ADDR_SURF_4_BANK)); 2687 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2690 NUM_BANKS(ADDR_SURF_4_BANK)); 2691 2692 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2693 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2694 2695 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2696 if (reg_offset != 7) 2697 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2698 2699 break; 2700 case CHIP_POLARIS11: 2701 case CHIP_POLARIS12: 2702 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2704 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2705 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2706 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2708 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2710 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2712 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2713 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2714 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2716 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2717 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2718 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2720 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2722 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2724 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2726 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2728 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2730 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2732 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2733 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2734 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2735 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2736 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2737 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2740 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2741 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2744 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2745 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2748 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2749 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2750 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2752 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2753 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2754 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2756 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2757 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2758 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2760 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2761 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2762 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2763 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2764 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2765 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2766 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2767 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2768 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2769 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2770 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2772 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2774 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2776 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2778 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2779 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2780 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2782 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2783 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2784 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2786 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2788 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2792 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2794 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2796 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2799 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2800 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2802 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2804 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2806 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2808 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2809 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2810 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2811 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2812 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2814 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2816 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2818 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2820 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2821 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2822 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2824 2825 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2828 NUM_BANKS(ADDR_SURF_16_BANK)); 2829 2830 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2831 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2832 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2833 NUM_BANKS(ADDR_SURF_16_BANK)); 2834 2835 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2836 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2837 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2838 NUM_BANKS(ADDR_SURF_16_BANK)); 2839 2840 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2841 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2842 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2843 NUM_BANKS(ADDR_SURF_16_BANK)); 2844 2845 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2848 NUM_BANKS(ADDR_SURF_16_BANK)); 2849 2850 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2851 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2852 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2853 NUM_BANKS(ADDR_SURF_16_BANK)); 2854 2855 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2858 NUM_BANKS(ADDR_SURF_16_BANK)); 2859 2860 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2861 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2862 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2863 NUM_BANKS(ADDR_SURF_16_BANK)); 2864 2865 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2868 NUM_BANKS(ADDR_SURF_16_BANK)); 2869 2870 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2873 NUM_BANKS(ADDR_SURF_16_BANK)); 2874 2875 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2878 NUM_BANKS(ADDR_SURF_16_BANK)); 2879 2880 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2883 NUM_BANKS(ADDR_SURF_16_BANK)); 2884 2885 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2888 NUM_BANKS(ADDR_SURF_8_BANK)); 2889 2890 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2893 NUM_BANKS(ADDR_SURF_4_BANK)); 2894 2895 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2896 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2897 2898 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2899 if (reg_offset != 7) 2900 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2901 2902 break; 2903 case CHIP_POLARIS10: 2904 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2906 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2907 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2908 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2911 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2912 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2914 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2915 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2916 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2919 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2920 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2922 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2923 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2924 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2926 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2928 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2931 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2932 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2933 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2934 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2935 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2936 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2938 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2939 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2942 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2943 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2946 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2947 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2948 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2949 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2950 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2951 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2952 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2954 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2955 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2956 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2957 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2958 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2959 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2960 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2961 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2962 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2964 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2966 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2967 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2968 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2969 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2970 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2971 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2972 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2973 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2974 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2976 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2978 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2980 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2981 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2982 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2984 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2985 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2986 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2988 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2990 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2992 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2993 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2994 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2995 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2996 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2998 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3000 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3002 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3004 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3006 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3007 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3008 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3010 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3012 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3013 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3014 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3015 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3016 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3018 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3020 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3022 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3023 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3024 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3025 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3026 3027 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3028 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3029 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3030 NUM_BANKS(ADDR_SURF_16_BANK)); 3031 3032 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3035 NUM_BANKS(ADDR_SURF_16_BANK)); 3036 3037 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3038 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3039 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3040 NUM_BANKS(ADDR_SURF_16_BANK)); 3041 3042 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3045 NUM_BANKS(ADDR_SURF_16_BANK)); 3046 3047 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3048 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3049 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3050 NUM_BANKS(ADDR_SURF_16_BANK)); 3051 3052 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3055 NUM_BANKS(ADDR_SURF_16_BANK)); 3056 3057 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3060 NUM_BANKS(ADDR_SURF_16_BANK)); 3061 3062 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3065 NUM_BANKS(ADDR_SURF_16_BANK)); 3066 3067 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3070 NUM_BANKS(ADDR_SURF_16_BANK)); 3071 3072 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3075 NUM_BANKS(ADDR_SURF_16_BANK)); 3076 3077 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3080 NUM_BANKS(ADDR_SURF_16_BANK)); 3081 3082 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3085 NUM_BANKS(ADDR_SURF_8_BANK)); 3086 3087 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3090 NUM_BANKS(ADDR_SURF_4_BANK)); 3091 3092 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3095 NUM_BANKS(ADDR_SURF_4_BANK)); 3096 3097 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3098 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3099 3100 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3101 if (reg_offset != 7) 3102 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3103 3104 break; 3105 case CHIP_STONEY: 3106 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3107 PIPE_CONFIG(ADDR_SURF_P2) | 3108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3110 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3111 PIPE_CONFIG(ADDR_SURF_P2) | 3112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3114 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3115 PIPE_CONFIG(ADDR_SURF_P2) | 3116 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3118 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3119 PIPE_CONFIG(ADDR_SURF_P2) | 3120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3122 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3123 PIPE_CONFIG(ADDR_SURF_P2) | 3124 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3125 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3126 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3127 PIPE_CONFIG(ADDR_SURF_P2) | 3128 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3130 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3131 PIPE_CONFIG(ADDR_SURF_P2) | 3132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3134 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3135 PIPE_CONFIG(ADDR_SURF_P2)); 3136 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3137 PIPE_CONFIG(ADDR_SURF_P2) | 3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3140 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3141 PIPE_CONFIG(ADDR_SURF_P2) | 3142 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3144 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3145 PIPE_CONFIG(ADDR_SURF_P2) | 3146 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3148 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3149 PIPE_CONFIG(ADDR_SURF_P2) | 3150 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3152 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3153 PIPE_CONFIG(ADDR_SURF_P2) | 3154 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3156 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3157 PIPE_CONFIG(ADDR_SURF_P2) | 3158 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3160 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3161 PIPE_CONFIG(ADDR_SURF_P2) | 3162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3164 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3165 PIPE_CONFIG(ADDR_SURF_P2) | 3166 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3168 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3169 PIPE_CONFIG(ADDR_SURF_P2) | 3170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3172 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3173 PIPE_CONFIG(ADDR_SURF_P2) | 3174 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3176 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3177 PIPE_CONFIG(ADDR_SURF_P2) | 3178 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3180 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3184 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3188 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3192 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3196 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3200 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3204 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3205 PIPE_CONFIG(ADDR_SURF_P2) | 3206 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3208 3209 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3212 NUM_BANKS(ADDR_SURF_8_BANK)); 3213 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3216 NUM_BANKS(ADDR_SURF_8_BANK)); 3217 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3220 NUM_BANKS(ADDR_SURF_8_BANK)); 3221 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3224 NUM_BANKS(ADDR_SURF_8_BANK)); 3225 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3228 NUM_BANKS(ADDR_SURF_8_BANK)); 3229 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3232 NUM_BANKS(ADDR_SURF_8_BANK)); 3233 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3236 NUM_BANKS(ADDR_SURF_8_BANK)); 3237 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3240 NUM_BANKS(ADDR_SURF_16_BANK)); 3241 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3244 NUM_BANKS(ADDR_SURF_16_BANK)); 3245 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3248 NUM_BANKS(ADDR_SURF_16_BANK)); 3249 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3252 NUM_BANKS(ADDR_SURF_16_BANK)); 3253 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3256 NUM_BANKS(ADDR_SURF_16_BANK)); 3257 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3260 NUM_BANKS(ADDR_SURF_16_BANK)); 3261 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3264 NUM_BANKS(ADDR_SURF_8_BANK)); 3265 3266 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3267 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3268 reg_offset != 23) 3269 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3270 3271 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3272 if (reg_offset != 7) 3273 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3274 3275 break; 3276 default: 3277 dev_warn(adev->dev, 3278 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3279 adev->asic_type); 3280 3281 case CHIP_CARRIZO: 3282 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3283 PIPE_CONFIG(ADDR_SURF_P2) | 3284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3286 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3287 PIPE_CONFIG(ADDR_SURF_P2) | 3288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3290 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3291 PIPE_CONFIG(ADDR_SURF_P2) | 3292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3294 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3295 PIPE_CONFIG(ADDR_SURF_P2) | 3296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3298 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3299 PIPE_CONFIG(ADDR_SURF_P2) | 3300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3302 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3303 PIPE_CONFIG(ADDR_SURF_P2) | 3304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3306 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3307 PIPE_CONFIG(ADDR_SURF_P2) | 3308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3310 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3311 PIPE_CONFIG(ADDR_SURF_P2)); 3312 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3313 PIPE_CONFIG(ADDR_SURF_P2) | 3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3316 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3317 PIPE_CONFIG(ADDR_SURF_P2) | 3318 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3320 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3321 PIPE_CONFIG(ADDR_SURF_P2) | 3322 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3324 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3325 PIPE_CONFIG(ADDR_SURF_P2) | 3326 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3328 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3329 PIPE_CONFIG(ADDR_SURF_P2) | 3330 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3332 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3333 PIPE_CONFIG(ADDR_SURF_P2) | 3334 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3336 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3337 PIPE_CONFIG(ADDR_SURF_P2) | 3338 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3340 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3341 PIPE_CONFIG(ADDR_SURF_P2) | 3342 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3344 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3345 PIPE_CONFIG(ADDR_SURF_P2) | 3346 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3348 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3349 PIPE_CONFIG(ADDR_SURF_P2) | 3350 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3352 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3353 PIPE_CONFIG(ADDR_SURF_P2) | 3354 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3356 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3357 PIPE_CONFIG(ADDR_SURF_P2) | 3358 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3360 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3361 PIPE_CONFIG(ADDR_SURF_P2) | 3362 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3364 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3365 PIPE_CONFIG(ADDR_SURF_P2) | 3366 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3368 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3369 PIPE_CONFIG(ADDR_SURF_P2) | 3370 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3372 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3373 PIPE_CONFIG(ADDR_SURF_P2) | 3374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3376 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3377 PIPE_CONFIG(ADDR_SURF_P2) | 3378 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3380 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3381 PIPE_CONFIG(ADDR_SURF_P2) | 3382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3384 3385 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3388 NUM_BANKS(ADDR_SURF_8_BANK)); 3389 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3392 NUM_BANKS(ADDR_SURF_8_BANK)); 3393 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3396 NUM_BANKS(ADDR_SURF_8_BANK)); 3397 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3400 NUM_BANKS(ADDR_SURF_8_BANK)); 3401 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3404 NUM_BANKS(ADDR_SURF_8_BANK)); 3405 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3408 NUM_BANKS(ADDR_SURF_8_BANK)); 3409 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3412 NUM_BANKS(ADDR_SURF_8_BANK)); 3413 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3416 NUM_BANKS(ADDR_SURF_16_BANK)); 3417 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3420 NUM_BANKS(ADDR_SURF_16_BANK)); 3421 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3424 NUM_BANKS(ADDR_SURF_16_BANK)); 3425 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3428 NUM_BANKS(ADDR_SURF_16_BANK)); 3429 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3432 NUM_BANKS(ADDR_SURF_16_BANK)); 3433 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3436 NUM_BANKS(ADDR_SURF_16_BANK)); 3437 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3440 NUM_BANKS(ADDR_SURF_8_BANK)); 3441 3442 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3443 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3444 reg_offset != 23) 3445 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3446 3447 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3448 if (reg_offset != 7) 3449 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3450 3451 break; 3452 } 3453 } 3454 3455 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3456 u32 se_num, u32 sh_num, u32 instance) 3457 { 3458 u32 data; 3459 3460 if (instance == 0xffffffff) 3461 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3462 else 3463 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3464 3465 if (se_num == 0xffffffff) 3466 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3467 else 3468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3469 3470 if (sh_num == 0xffffffff) 3471 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3472 else 3473 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3474 3475 WREG32(mmGRBM_GFX_INDEX, data); 3476 } 3477 3478 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3479 u32 me, u32 pipe, u32 q) 3480 { 3481 vi_srbm_select(adev, me, pipe, q, 0); 3482 } 3483 3484 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3485 { 3486 u32 data, mask; 3487 3488 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3489 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3490 3491 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3492 3493 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3494 adev->gfx.config.max_sh_per_se); 3495 3496 return (~data) & mask; 3497 } 3498 3499 static void 3500 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3501 { 3502 switch (adev->asic_type) { 3503 case CHIP_FIJI: 3504 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3505 RB_XSEL2(1) | PKR_MAP(2) | 3506 PKR_XSEL(1) | PKR_YSEL(1) | 3507 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3508 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3509 SE_PAIR_YSEL(2); 3510 break; 3511 case CHIP_TONGA: 3512 case CHIP_POLARIS10: 3513 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3514 SE_XSEL(1) | SE_YSEL(1); 3515 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3516 SE_PAIR_YSEL(2); 3517 break; 3518 case CHIP_TOPAZ: 3519 case CHIP_CARRIZO: 3520 *rconf |= RB_MAP_PKR0(2); 3521 *rconf1 |= 0x0; 3522 break; 3523 case CHIP_POLARIS11: 3524 case CHIP_POLARIS12: 3525 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3526 SE_XSEL(1) | SE_YSEL(1); 3527 *rconf1 |= 0x0; 3528 break; 3529 case CHIP_STONEY: 3530 *rconf |= 0x0; 3531 *rconf1 |= 0x0; 3532 break; 3533 default: 3534 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3535 break; 3536 } 3537 } 3538 3539 static void 3540 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3541 u32 raster_config, u32 raster_config_1, 3542 unsigned rb_mask, unsigned num_rb) 3543 { 3544 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3545 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3546 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3547 unsigned rb_per_se = num_rb / num_se; 3548 unsigned se_mask[4]; 3549 unsigned se; 3550 3551 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3552 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3553 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3554 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3555 3556 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3557 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3558 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3559 3560 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3561 (!se_mask[2] && !se_mask[3]))) { 3562 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3563 3564 if (!se_mask[0] && !se_mask[1]) { 3565 raster_config_1 |= 3566 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3567 } else { 3568 raster_config_1 |= 3569 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3570 } 3571 } 3572 3573 for (se = 0; se < num_se; se++) { 3574 unsigned raster_config_se = raster_config; 3575 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3576 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3577 int idx = (se / 2) * 2; 3578 3579 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3580 raster_config_se &= ~SE_MAP_MASK; 3581 3582 if (!se_mask[idx]) { 3583 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3584 } else { 3585 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3586 } 3587 } 3588 3589 pkr0_mask &= rb_mask; 3590 pkr1_mask &= rb_mask; 3591 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3592 raster_config_se &= ~PKR_MAP_MASK; 3593 3594 if (!pkr0_mask) { 3595 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3596 } else { 3597 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3598 } 3599 } 3600 3601 if (rb_per_se >= 2) { 3602 unsigned rb0_mask = 1 << (se * rb_per_se); 3603 unsigned rb1_mask = rb0_mask << 1; 3604 3605 rb0_mask &= rb_mask; 3606 rb1_mask &= rb_mask; 3607 if (!rb0_mask || !rb1_mask) { 3608 raster_config_se &= ~RB_MAP_PKR0_MASK; 3609 3610 if (!rb0_mask) { 3611 raster_config_se |= 3612 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3613 } else { 3614 raster_config_se |= 3615 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3616 } 3617 } 3618 3619 if (rb_per_se > 2) { 3620 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3621 rb1_mask = rb0_mask << 1; 3622 rb0_mask &= rb_mask; 3623 rb1_mask &= rb_mask; 3624 if (!rb0_mask || !rb1_mask) { 3625 raster_config_se &= ~RB_MAP_PKR1_MASK; 3626 3627 if (!rb0_mask) { 3628 raster_config_se |= 3629 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3630 } else { 3631 raster_config_se |= 3632 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3633 } 3634 } 3635 } 3636 } 3637 3638 /* GRBM_GFX_INDEX has a different offset on VI */ 3639 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3640 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3641 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3642 } 3643 3644 /* GRBM_GFX_INDEX has a different offset on VI */ 3645 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3646 } 3647 3648 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3649 { 3650 int i, j; 3651 u32 data; 3652 u32 raster_config = 0, raster_config_1 = 0; 3653 u32 active_rbs = 0; 3654 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3655 adev->gfx.config.max_sh_per_se; 3656 unsigned num_rb_pipes; 3657 3658 mutex_lock(&adev->grbm_idx_mutex); 3659 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3660 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3661 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3662 data = gfx_v8_0_get_rb_active_bitmap(adev); 3663 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3664 rb_bitmap_width_per_sh); 3665 } 3666 } 3667 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3668 3669 adev->gfx.config.backend_enable_mask = active_rbs; 3670 adev->gfx.config.num_rbs = hweight32(active_rbs); 3671 3672 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3673 adev->gfx.config.max_shader_engines, 16); 3674 3675 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3676 3677 if (!adev->gfx.config.backend_enable_mask || 3678 adev->gfx.config.num_rbs >= num_rb_pipes) { 3679 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3680 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3681 } else { 3682 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3683 adev->gfx.config.backend_enable_mask, 3684 num_rb_pipes); 3685 } 3686 3687 /* cache the values for userspace */ 3688 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3689 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3690 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3691 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3692 RREG32(mmCC_RB_BACKEND_DISABLE); 3693 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3694 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3695 adev->gfx.config.rb_config[i][j].raster_config = 3696 RREG32(mmPA_SC_RASTER_CONFIG); 3697 adev->gfx.config.rb_config[i][j].raster_config_1 = 3698 RREG32(mmPA_SC_RASTER_CONFIG_1); 3699 } 3700 } 3701 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3702 mutex_unlock(&adev->grbm_idx_mutex); 3703 } 3704 3705 /** 3706 * gfx_v8_0_init_compute_vmid - gart enable 3707 * 3708 * @adev: amdgpu_device pointer 3709 * 3710 * Initialize compute vmid sh_mem registers 3711 * 3712 */ 3713 #define DEFAULT_SH_MEM_BASES (0x6000) 3714 #define FIRST_COMPUTE_VMID (8) 3715 #define LAST_COMPUTE_VMID (16) 3716 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3717 { 3718 int i; 3719 uint32_t sh_mem_config; 3720 uint32_t sh_mem_bases; 3721 3722 /* 3723 * Configure apertures: 3724 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3725 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3726 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3727 */ 3728 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3729 3730 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3731 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3732 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3733 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3734 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3735 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3736 3737 mutex_lock(&adev->srbm_mutex); 3738 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3739 vi_srbm_select(adev, 0, 0, 0, i); 3740 /* CP and shaders */ 3741 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3742 WREG32(mmSH_MEM_APE1_BASE, 1); 3743 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3744 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3745 } 3746 vi_srbm_select(adev, 0, 0, 0, 0); 3747 mutex_unlock(&adev->srbm_mutex); 3748 } 3749 3750 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3751 { 3752 switch (adev->asic_type) { 3753 default: 3754 adev->gfx.config.double_offchip_lds_buf = 1; 3755 break; 3756 case CHIP_CARRIZO: 3757 case CHIP_STONEY: 3758 adev->gfx.config.double_offchip_lds_buf = 0; 3759 break; 3760 } 3761 } 3762 3763 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3764 { 3765 u32 tmp, sh_static_mem_cfg; 3766 int i; 3767 3768 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3769 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3770 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3771 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3772 3773 gfx_v8_0_tiling_mode_table_init(adev); 3774 gfx_v8_0_setup_rb(adev); 3775 gfx_v8_0_get_cu_info(adev); 3776 gfx_v8_0_config_init(adev); 3777 3778 /* XXX SH_MEM regs */ 3779 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3780 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3781 SWIZZLE_ENABLE, 1); 3782 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3783 ELEMENT_SIZE, 1); 3784 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3785 INDEX_STRIDE, 3); 3786 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3787 3788 mutex_lock(&adev->srbm_mutex); 3789 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3790 vi_srbm_select(adev, 0, 0, 0, i); 3791 /* CP and shaders */ 3792 if (i == 0) { 3793 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3794 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3795 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3796 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3797 WREG32(mmSH_MEM_CONFIG, tmp); 3798 WREG32(mmSH_MEM_BASES, 0); 3799 } else { 3800 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3801 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3802 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3803 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3804 WREG32(mmSH_MEM_CONFIG, tmp); 3805 tmp = adev->gmc.shared_aperture_start >> 48; 3806 WREG32(mmSH_MEM_BASES, tmp); 3807 } 3808 3809 WREG32(mmSH_MEM_APE1_BASE, 1); 3810 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3811 } 3812 vi_srbm_select(adev, 0, 0, 0, 0); 3813 mutex_unlock(&adev->srbm_mutex); 3814 3815 gfx_v8_0_init_compute_vmid(adev); 3816 3817 mutex_lock(&adev->grbm_idx_mutex); 3818 /* 3819 * making sure that the following register writes will be broadcasted 3820 * to all the shaders 3821 */ 3822 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3823 3824 WREG32(mmPA_SC_FIFO_SIZE, 3825 (adev->gfx.config.sc_prim_fifo_size_frontend << 3826 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3827 (adev->gfx.config.sc_prim_fifo_size_backend << 3828 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3829 (adev->gfx.config.sc_hiz_tile_fifo_size << 3830 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3831 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3832 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3833 3834 tmp = RREG32(mmSPI_ARB_PRIORITY); 3835 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3836 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3837 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3838 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3839 WREG32(mmSPI_ARB_PRIORITY, tmp); 3840 3841 mutex_unlock(&adev->grbm_idx_mutex); 3842 3843 } 3844 3845 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3846 { 3847 u32 i, j, k; 3848 u32 mask; 3849 3850 mutex_lock(&adev->grbm_idx_mutex); 3851 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3852 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3853 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3854 for (k = 0; k < adev->usec_timeout; k++) { 3855 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3856 break; 3857 udelay(1); 3858 } 3859 if (k == adev->usec_timeout) { 3860 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3861 0xffffffff, 0xffffffff); 3862 mutex_unlock(&adev->grbm_idx_mutex); 3863 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3864 i, j); 3865 return; 3866 } 3867 } 3868 } 3869 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3870 mutex_unlock(&adev->grbm_idx_mutex); 3871 3872 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3873 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3874 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3875 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3876 for (k = 0; k < adev->usec_timeout; k++) { 3877 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3878 break; 3879 udelay(1); 3880 } 3881 } 3882 3883 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3884 bool enable) 3885 { 3886 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3887 3888 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3889 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3890 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3891 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3892 3893 WREG32(mmCP_INT_CNTL_RING0, tmp); 3894 } 3895 3896 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3897 { 3898 /* csib */ 3899 WREG32(mmRLC_CSIB_ADDR_HI, 3900 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3901 WREG32(mmRLC_CSIB_ADDR_LO, 3902 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3903 WREG32(mmRLC_CSIB_LENGTH, 3904 adev->gfx.rlc.clear_state_size); 3905 } 3906 3907 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3908 int ind_offset, 3909 int list_size, 3910 int *unique_indices, 3911 int *indices_count, 3912 int max_indices, 3913 int *ind_start_offsets, 3914 int *offset_count, 3915 int max_offset) 3916 { 3917 int indices; 3918 bool new_entry = true; 3919 3920 for (; ind_offset < list_size; ind_offset++) { 3921 3922 if (new_entry) { 3923 new_entry = false; 3924 ind_start_offsets[*offset_count] = ind_offset; 3925 *offset_count = *offset_count + 1; 3926 BUG_ON(*offset_count >= max_offset); 3927 } 3928 3929 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3930 new_entry = true; 3931 continue; 3932 } 3933 3934 ind_offset += 2; 3935 3936 /* look for the matching indice */ 3937 for (indices = 0; 3938 indices < *indices_count; 3939 indices++) { 3940 if (unique_indices[indices] == 3941 register_list_format[ind_offset]) 3942 break; 3943 } 3944 3945 if (indices >= *indices_count) { 3946 unique_indices[*indices_count] = 3947 register_list_format[ind_offset]; 3948 indices = *indices_count; 3949 *indices_count = *indices_count + 1; 3950 BUG_ON(*indices_count >= max_indices); 3951 } 3952 3953 register_list_format[ind_offset] = indices; 3954 } 3955 } 3956 3957 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3958 { 3959 int i, temp, data; 3960 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3961 int indices_count = 0; 3962 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3963 int offset_count = 0; 3964 3965 int list_size; 3966 unsigned int *register_list_format = 3967 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3968 if (!register_list_format) 3969 return -ENOMEM; 3970 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3971 adev->gfx.rlc.reg_list_format_size_bytes); 3972 3973 gfx_v8_0_parse_ind_reg_list(register_list_format, 3974 RLC_FormatDirectRegListLength, 3975 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3976 unique_indices, 3977 &indices_count, 3978 ARRAY_SIZE(unique_indices), 3979 indirect_start_offsets, 3980 &offset_count, 3981 ARRAY_SIZE(indirect_start_offsets)); 3982 3983 /* save and restore list */ 3984 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3985 3986 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3987 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3988 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3989 3990 /* indirect list */ 3991 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3992 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3993 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3994 3995 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3996 list_size = list_size >> 1; 3997 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3998 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3999 4000 /* starting offsets starts */ 4001 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4002 adev->gfx.rlc.starting_offsets_start); 4003 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4004 WREG32(mmRLC_GPM_SCRATCH_DATA, 4005 indirect_start_offsets[i]); 4006 4007 /* unique indices */ 4008 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4009 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4010 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4011 if (unique_indices[i] != 0) { 4012 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4013 WREG32(data + i, unique_indices[i] >> 20); 4014 } 4015 } 4016 kfree(register_list_format); 4017 4018 return 0; 4019 } 4020 4021 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4022 { 4023 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4024 } 4025 4026 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4027 { 4028 uint32_t data; 4029 4030 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4031 4032 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4033 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4034 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4035 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4036 WREG32(mmRLC_PG_DELAY, data); 4037 4038 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4039 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4040 4041 } 4042 4043 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4044 bool enable) 4045 { 4046 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4047 } 4048 4049 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4050 bool enable) 4051 { 4052 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4053 } 4054 4055 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4056 { 4057 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4058 } 4059 4060 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4061 { 4062 if ((adev->asic_type == CHIP_CARRIZO) || 4063 (adev->asic_type == CHIP_STONEY)) { 4064 gfx_v8_0_init_csb(adev); 4065 gfx_v8_0_init_save_restore_list(adev); 4066 gfx_v8_0_enable_save_restore_machine(adev); 4067 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4068 gfx_v8_0_init_power_gating(adev); 4069 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4070 } else if ((adev->asic_type == CHIP_POLARIS11) || 4071 (adev->asic_type == CHIP_POLARIS12)) { 4072 gfx_v8_0_init_csb(adev); 4073 gfx_v8_0_init_save_restore_list(adev); 4074 gfx_v8_0_enable_save_restore_machine(adev); 4075 gfx_v8_0_init_power_gating(adev); 4076 } 4077 4078 } 4079 4080 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4081 { 4082 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4083 4084 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4085 gfx_v8_0_wait_for_rlc_serdes(adev); 4086 } 4087 4088 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4089 { 4090 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4091 udelay(50); 4092 4093 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4094 udelay(50); 4095 } 4096 4097 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4098 { 4099 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4100 4101 /* carrizo do enable cp interrupt after cp inited */ 4102 if (!(adev->flags & AMD_IS_APU)) 4103 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4104 4105 udelay(50); 4106 } 4107 4108 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4109 { 4110 const struct rlc_firmware_header_v2_0 *hdr; 4111 const __le32 *fw_data; 4112 unsigned i, fw_size; 4113 4114 if (!adev->gfx.rlc_fw) 4115 return -EINVAL; 4116 4117 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4118 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4119 4120 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4121 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4122 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4123 4124 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4125 for (i = 0; i < fw_size; i++) 4126 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4127 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4128 4129 return 0; 4130 } 4131 4132 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4133 { 4134 int r; 4135 u32 tmp; 4136 4137 gfx_v8_0_rlc_stop(adev); 4138 4139 /* disable CG */ 4140 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4141 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4142 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4143 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4144 if (adev->asic_type == CHIP_POLARIS11 || 4145 adev->asic_type == CHIP_POLARIS10 || 4146 adev->asic_type == CHIP_POLARIS12) { 4147 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4148 tmp &= ~0x3; 4149 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4150 } 4151 4152 /* disable PG */ 4153 WREG32(mmRLC_PG_CNTL, 0); 4154 4155 gfx_v8_0_rlc_reset(adev); 4156 gfx_v8_0_init_pg(adev); 4157 4158 4159 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4160 /* legacy rlc firmware loading */ 4161 r = gfx_v8_0_rlc_load_microcode(adev); 4162 if (r) 4163 return r; 4164 } 4165 4166 gfx_v8_0_rlc_start(adev); 4167 4168 return 0; 4169 } 4170 4171 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4172 { 4173 int i; 4174 u32 tmp = RREG32(mmCP_ME_CNTL); 4175 4176 if (enable) { 4177 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4178 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4179 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4180 } else { 4181 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4182 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4183 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4184 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4185 adev->gfx.gfx_ring[i].ready = false; 4186 } 4187 WREG32(mmCP_ME_CNTL, tmp); 4188 udelay(50); 4189 } 4190 4191 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4192 { 4193 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4194 const struct gfx_firmware_header_v1_0 *ce_hdr; 4195 const struct gfx_firmware_header_v1_0 *me_hdr; 4196 const __le32 *fw_data; 4197 unsigned i, fw_size; 4198 4199 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4200 return -EINVAL; 4201 4202 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4203 adev->gfx.pfp_fw->data; 4204 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4205 adev->gfx.ce_fw->data; 4206 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4207 adev->gfx.me_fw->data; 4208 4209 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4210 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4211 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4212 4213 gfx_v8_0_cp_gfx_enable(adev, false); 4214 4215 /* PFP */ 4216 fw_data = (const __le32 *) 4217 (adev->gfx.pfp_fw->data + 4218 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4219 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4220 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4221 for (i = 0; i < fw_size; i++) 4222 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4223 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4224 4225 /* CE */ 4226 fw_data = (const __le32 *) 4227 (adev->gfx.ce_fw->data + 4228 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4229 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4230 WREG32(mmCP_CE_UCODE_ADDR, 0); 4231 for (i = 0; i < fw_size; i++) 4232 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4233 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4234 4235 /* ME */ 4236 fw_data = (const __le32 *) 4237 (adev->gfx.me_fw->data + 4238 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4239 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4240 WREG32(mmCP_ME_RAM_WADDR, 0); 4241 for (i = 0; i < fw_size; i++) 4242 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4243 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4244 4245 return 0; 4246 } 4247 4248 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4249 { 4250 u32 count = 0; 4251 const struct cs_section_def *sect = NULL; 4252 const struct cs_extent_def *ext = NULL; 4253 4254 /* begin clear state */ 4255 count += 2; 4256 /* context control state */ 4257 count += 3; 4258 4259 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4260 for (ext = sect->section; ext->extent != NULL; ++ext) { 4261 if (sect->id == SECT_CONTEXT) 4262 count += 2 + ext->reg_count; 4263 else 4264 return 0; 4265 } 4266 } 4267 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4268 count += 4; 4269 /* end clear state */ 4270 count += 2; 4271 /* clear state */ 4272 count += 2; 4273 4274 return count; 4275 } 4276 4277 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4278 { 4279 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4280 const struct cs_section_def *sect = NULL; 4281 const struct cs_extent_def *ext = NULL; 4282 int r, i; 4283 4284 /* init the CP */ 4285 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4286 WREG32(mmCP_ENDIAN_SWAP, 0); 4287 WREG32(mmCP_DEVICE_ID, 1); 4288 4289 gfx_v8_0_cp_gfx_enable(adev, true); 4290 4291 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4292 if (r) { 4293 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4294 return r; 4295 } 4296 4297 /* clear state buffer */ 4298 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4299 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4300 4301 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4302 amdgpu_ring_write(ring, 0x80000000); 4303 amdgpu_ring_write(ring, 0x80000000); 4304 4305 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4306 for (ext = sect->section; ext->extent != NULL; ++ext) { 4307 if (sect->id == SECT_CONTEXT) { 4308 amdgpu_ring_write(ring, 4309 PACKET3(PACKET3_SET_CONTEXT_REG, 4310 ext->reg_count)); 4311 amdgpu_ring_write(ring, 4312 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4313 for (i = 0; i < ext->reg_count; i++) 4314 amdgpu_ring_write(ring, ext->extent[i]); 4315 } 4316 } 4317 } 4318 4319 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4320 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4321 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4322 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4323 4324 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4325 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4326 4327 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4328 amdgpu_ring_write(ring, 0); 4329 4330 /* init the CE partitions */ 4331 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4332 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4333 amdgpu_ring_write(ring, 0x8000); 4334 amdgpu_ring_write(ring, 0x8000); 4335 4336 amdgpu_ring_commit(ring); 4337 4338 return 0; 4339 } 4340 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4341 { 4342 u32 tmp; 4343 /* no gfx doorbells on iceland */ 4344 if (adev->asic_type == CHIP_TOPAZ) 4345 return; 4346 4347 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4348 4349 if (ring->use_doorbell) { 4350 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4351 DOORBELL_OFFSET, ring->doorbell_index); 4352 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4353 DOORBELL_HIT, 0); 4354 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4355 DOORBELL_EN, 1); 4356 } else { 4357 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4358 } 4359 4360 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4361 4362 if (adev->flags & AMD_IS_APU) 4363 return; 4364 4365 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4366 DOORBELL_RANGE_LOWER, 4367 AMDGPU_DOORBELL_GFX_RING0); 4368 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4369 4370 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4371 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4372 } 4373 4374 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4375 { 4376 struct amdgpu_ring *ring; 4377 u32 tmp; 4378 u32 rb_bufsz; 4379 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4380 int r; 4381 4382 /* Set the write pointer delay */ 4383 WREG32(mmCP_RB_WPTR_DELAY, 0); 4384 4385 /* set the RB to use vmid 0 */ 4386 WREG32(mmCP_RB_VMID, 0); 4387 4388 /* Set ring buffer size */ 4389 ring = &adev->gfx.gfx_ring[0]; 4390 rb_bufsz = order_base_2(ring->ring_size / 8); 4391 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4392 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4393 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4394 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4395 #ifdef __BIG_ENDIAN 4396 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4397 #endif 4398 WREG32(mmCP_RB0_CNTL, tmp); 4399 4400 /* Initialize the ring buffer's read and write pointers */ 4401 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4402 ring->wptr = 0; 4403 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4404 4405 /* set the wb address wether it's enabled or not */ 4406 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4407 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4408 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4409 4410 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4411 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4412 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4413 mdelay(1); 4414 WREG32(mmCP_RB0_CNTL, tmp); 4415 4416 rb_addr = ring->gpu_addr >> 8; 4417 WREG32(mmCP_RB0_BASE, rb_addr); 4418 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4419 4420 gfx_v8_0_set_cpg_door_bell(adev, ring); 4421 /* start the ring */ 4422 amdgpu_ring_clear_ring(ring); 4423 gfx_v8_0_cp_gfx_start(adev); 4424 ring->ready = true; 4425 r = amdgpu_ring_test_ring(ring); 4426 if (r) 4427 ring->ready = false; 4428 4429 return r; 4430 } 4431 4432 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4433 { 4434 int i; 4435 4436 if (enable) { 4437 WREG32(mmCP_MEC_CNTL, 0); 4438 } else { 4439 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4440 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4441 adev->gfx.compute_ring[i].ready = false; 4442 adev->gfx.kiq.ring.ready = false; 4443 } 4444 udelay(50); 4445 } 4446 4447 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4448 { 4449 const struct gfx_firmware_header_v1_0 *mec_hdr; 4450 const __le32 *fw_data; 4451 unsigned i, fw_size; 4452 4453 if (!adev->gfx.mec_fw) 4454 return -EINVAL; 4455 4456 gfx_v8_0_cp_compute_enable(adev, false); 4457 4458 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4459 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4460 4461 fw_data = (const __le32 *) 4462 (adev->gfx.mec_fw->data + 4463 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4464 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4465 4466 /* MEC1 */ 4467 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4468 for (i = 0; i < fw_size; i++) 4469 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4470 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4471 4472 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4473 if (adev->gfx.mec2_fw) { 4474 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4475 4476 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4477 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4478 4479 fw_data = (const __le32 *) 4480 (adev->gfx.mec2_fw->data + 4481 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4482 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4483 4484 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4485 for (i = 0; i < fw_size; i++) 4486 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4487 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4488 } 4489 4490 return 0; 4491 } 4492 4493 /* KIQ functions */ 4494 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4495 { 4496 uint32_t tmp; 4497 struct amdgpu_device *adev = ring->adev; 4498 4499 /* tell RLC which is KIQ queue */ 4500 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4501 tmp &= 0xffffff00; 4502 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4503 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4504 tmp |= 0x80; 4505 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4506 } 4507 4508 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4509 { 4510 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4511 uint32_t scratch, tmp = 0; 4512 uint64_t queue_mask = 0; 4513 int r, i; 4514 4515 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4516 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4517 continue; 4518 4519 /* This situation may be hit in the future if a new HW 4520 * generation exposes more than 64 queues. If so, the 4521 * definition of queue_mask needs updating */ 4522 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4523 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4524 break; 4525 } 4526 4527 queue_mask |= (1ull << i); 4528 } 4529 4530 r = amdgpu_gfx_scratch_get(adev, &scratch); 4531 if (r) { 4532 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4533 return r; 4534 } 4535 WREG32(scratch, 0xCAFEDEAD); 4536 4537 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4538 if (r) { 4539 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4540 amdgpu_gfx_scratch_free(adev, scratch); 4541 return r; 4542 } 4543 /* set resources */ 4544 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4545 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4546 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4547 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4548 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4549 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4550 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4551 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4552 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4553 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4554 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4555 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4556 4557 /* map queues */ 4558 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4559 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4560 amdgpu_ring_write(kiq_ring, 4561 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4562 amdgpu_ring_write(kiq_ring, 4563 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4564 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4565 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4566 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4567 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4568 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4569 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4570 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4571 } 4572 /* write to scratch for completion */ 4573 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4574 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4575 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4576 amdgpu_ring_commit(kiq_ring); 4577 4578 for (i = 0; i < adev->usec_timeout; i++) { 4579 tmp = RREG32(scratch); 4580 if (tmp == 0xDEADBEEF) 4581 break; 4582 DRM_UDELAY(1); 4583 } 4584 if (i >= adev->usec_timeout) { 4585 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4586 scratch, tmp); 4587 r = -EINVAL; 4588 } 4589 amdgpu_gfx_scratch_free(adev, scratch); 4590 4591 return r; 4592 } 4593 4594 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4595 { 4596 int i, r = 0; 4597 4598 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4599 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4600 for (i = 0; i < adev->usec_timeout; i++) { 4601 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4602 break; 4603 udelay(1); 4604 } 4605 if (i == adev->usec_timeout) 4606 r = -ETIMEDOUT; 4607 } 4608 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4609 WREG32(mmCP_HQD_PQ_RPTR, 0); 4610 WREG32(mmCP_HQD_PQ_WPTR, 0); 4611 4612 return r; 4613 } 4614 4615 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4616 { 4617 struct amdgpu_device *adev = ring->adev; 4618 struct vi_mqd *mqd = ring->mqd_ptr; 4619 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4620 uint32_t tmp; 4621 4622 mqd->header = 0xC0310800; 4623 mqd->compute_pipelinestat_enable = 0x00000001; 4624 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4625 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4626 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4627 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4628 mqd->compute_misc_reserved = 0x00000003; 4629 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4630 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4631 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4632 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4633 eop_base_addr = ring->eop_gpu_addr >> 8; 4634 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4635 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4636 4637 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4638 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4639 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4640 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4641 4642 mqd->cp_hqd_eop_control = tmp; 4643 4644 /* enable doorbell? */ 4645 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4646 CP_HQD_PQ_DOORBELL_CONTROL, 4647 DOORBELL_EN, 4648 ring->use_doorbell ? 1 : 0); 4649 4650 mqd->cp_hqd_pq_doorbell_control = tmp; 4651 4652 /* set the pointer to the MQD */ 4653 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4654 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4655 4656 /* set MQD vmid to 0 */ 4657 tmp = RREG32(mmCP_MQD_CONTROL); 4658 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4659 mqd->cp_mqd_control = tmp; 4660 4661 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4662 hqd_gpu_addr = ring->gpu_addr >> 8; 4663 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4664 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4665 4666 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4667 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4668 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4669 (order_base_2(ring->ring_size / 4) - 1)); 4670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4671 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4672 #ifdef __BIG_ENDIAN 4673 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4674 #endif 4675 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4676 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4677 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4678 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4679 mqd->cp_hqd_pq_control = tmp; 4680 4681 /* set the wb address whether it's enabled or not */ 4682 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4683 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4684 mqd->cp_hqd_pq_rptr_report_addr_hi = 4685 upper_32_bits(wb_gpu_addr) & 0xffff; 4686 4687 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4688 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4689 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4690 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4691 4692 tmp = 0; 4693 /* enable the doorbell if requested */ 4694 if (ring->use_doorbell) { 4695 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4696 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4697 DOORBELL_OFFSET, ring->doorbell_index); 4698 4699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4700 DOORBELL_EN, 1); 4701 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4702 DOORBELL_SOURCE, 0); 4703 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4704 DOORBELL_HIT, 0); 4705 } 4706 4707 mqd->cp_hqd_pq_doorbell_control = tmp; 4708 4709 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4710 ring->wptr = 0; 4711 mqd->cp_hqd_pq_wptr = ring->wptr; 4712 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4713 4714 /* set the vmid for the queue */ 4715 mqd->cp_hqd_vmid = 0; 4716 4717 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4718 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4719 mqd->cp_hqd_persistent_state = tmp; 4720 4721 /* set MTYPE */ 4722 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4723 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4724 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4725 mqd->cp_hqd_ib_control = tmp; 4726 4727 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4728 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4729 mqd->cp_hqd_iq_timer = tmp; 4730 4731 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4732 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4733 mqd->cp_hqd_ctx_save_control = tmp; 4734 4735 /* defaults */ 4736 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4737 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4738 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4739 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4740 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4741 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4742 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4743 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4744 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4745 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4746 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4747 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4748 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4749 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4750 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4751 4752 /* activate the queue */ 4753 mqd->cp_hqd_active = 1; 4754 4755 return 0; 4756 } 4757 4758 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4759 struct vi_mqd *mqd) 4760 { 4761 uint32_t mqd_reg; 4762 uint32_t *mqd_data; 4763 4764 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4765 mqd_data = &mqd->cp_mqd_base_addr_lo; 4766 4767 /* disable wptr polling */ 4768 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4769 4770 /* program all HQD registers */ 4771 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4772 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4773 4774 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4775 * This is safe since EOP RPTR==WPTR for any inactive HQD 4776 * on ASICs that do not support context-save. 4777 * EOP writes/reads can start anywhere in the ring. 4778 */ 4779 if (adev->asic_type != CHIP_TONGA) { 4780 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4781 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4782 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4783 } 4784 4785 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4786 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4787 4788 /* activate the HQD */ 4789 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4790 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4791 4792 return 0; 4793 } 4794 4795 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4796 { 4797 struct amdgpu_device *adev = ring->adev; 4798 struct vi_mqd *mqd = ring->mqd_ptr; 4799 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4800 4801 gfx_v8_0_kiq_setting(ring); 4802 4803 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4804 /* reset MQD to a clean status */ 4805 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4806 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4807 4808 /* reset ring buffer */ 4809 ring->wptr = 0; 4810 amdgpu_ring_clear_ring(ring); 4811 mutex_lock(&adev->srbm_mutex); 4812 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4813 gfx_v8_0_mqd_commit(adev, mqd); 4814 vi_srbm_select(adev, 0, 0, 0, 0); 4815 mutex_unlock(&adev->srbm_mutex); 4816 } else { 4817 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4818 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4819 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4820 mutex_lock(&adev->srbm_mutex); 4821 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4822 gfx_v8_0_mqd_init(ring); 4823 gfx_v8_0_mqd_commit(adev, mqd); 4824 vi_srbm_select(adev, 0, 0, 0, 0); 4825 mutex_unlock(&adev->srbm_mutex); 4826 4827 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4828 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4829 } 4830 4831 return 0; 4832 } 4833 4834 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4835 { 4836 struct amdgpu_device *adev = ring->adev; 4837 struct vi_mqd *mqd = ring->mqd_ptr; 4838 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4839 4840 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4841 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4842 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4843 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4844 mutex_lock(&adev->srbm_mutex); 4845 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4846 gfx_v8_0_mqd_init(ring); 4847 vi_srbm_select(adev, 0, 0, 0, 0); 4848 mutex_unlock(&adev->srbm_mutex); 4849 4850 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4851 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4852 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4853 /* reset MQD to a clean status */ 4854 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4855 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4856 /* reset ring buffer */ 4857 ring->wptr = 0; 4858 amdgpu_ring_clear_ring(ring); 4859 } else { 4860 amdgpu_ring_clear_ring(ring); 4861 } 4862 return 0; 4863 } 4864 4865 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4866 { 4867 if (adev->asic_type > CHIP_TONGA) { 4868 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4869 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4870 } 4871 /* enable doorbells */ 4872 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4873 } 4874 4875 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4876 { 4877 struct amdgpu_ring *ring = NULL; 4878 int r = 0, i; 4879 4880 gfx_v8_0_cp_compute_enable(adev, true); 4881 4882 ring = &adev->gfx.kiq.ring; 4883 4884 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4885 if (unlikely(r != 0)) 4886 goto done; 4887 4888 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4889 if (!r) { 4890 r = gfx_v8_0_kiq_init_queue(ring); 4891 amdgpu_bo_kunmap(ring->mqd_obj); 4892 ring->mqd_ptr = NULL; 4893 } 4894 amdgpu_bo_unreserve(ring->mqd_obj); 4895 if (r) 4896 goto done; 4897 4898 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4899 ring = &adev->gfx.compute_ring[i]; 4900 4901 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4902 if (unlikely(r != 0)) 4903 goto done; 4904 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4905 if (!r) { 4906 r = gfx_v8_0_kcq_init_queue(ring); 4907 amdgpu_bo_kunmap(ring->mqd_obj); 4908 ring->mqd_ptr = NULL; 4909 } 4910 amdgpu_bo_unreserve(ring->mqd_obj); 4911 if (r) 4912 goto done; 4913 } 4914 4915 gfx_v8_0_set_mec_doorbell_range(adev); 4916 4917 r = gfx_v8_0_kiq_kcq_enable(adev); 4918 if (r) 4919 goto done; 4920 4921 /* Test KIQ */ 4922 ring = &adev->gfx.kiq.ring; 4923 ring->ready = true; 4924 r = amdgpu_ring_test_ring(ring); 4925 if (r) { 4926 ring->ready = false; 4927 goto done; 4928 } 4929 4930 /* Test KCQs */ 4931 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4932 ring = &adev->gfx.compute_ring[i]; 4933 ring->ready = true; 4934 r = amdgpu_ring_test_ring(ring); 4935 if (r) 4936 ring->ready = false; 4937 } 4938 4939 done: 4940 return r; 4941 } 4942 4943 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4944 { 4945 int r; 4946 4947 if (!(adev->flags & AMD_IS_APU)) 4948 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4949 4950 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4951 /* legacy firmware loading */ 4952 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4953 if (r) 4954 return r; 4955 4956 r = gfx_v8_0_cp_compute_load_microcode(adev); 4957 if (r) 4958 return r; 4959 } 4960 4961 r = gfx_v8_0_cp_gfx_resume(adev); 4962 if (r) 4963 return r; 4964 4965 r = gfx_v8_0_kiq_resume(adev); 4966 if (r) 4967 return r; 4968 4969 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4970 4971 return 0; 4972 } 4973 4974 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4975 { 4976 gfx_v8_0_cp_gfx_enable(adev, enable); 4977 gfx_v8_0_cp_compute_enable(adev, enable); 4978 } 4979 4980 static int gfx_v8_0_hw_init(void *handle) 4981 { 4982 int r; 4983 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4984 4985 gfx_v8_0_init_golden_registers(adev); 4986 gfx_v8_0_gpu_init(adev); 4987 4988 r = gfx_v8_0_rlc_resume(adev); 4989 if (r) 4990 return r; 4991 4992 r = gfx_v8_0_cp_resume(adev); 4993 4994 return r; 4995 } 4996 4997 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 4998 { 4999 struct amdgpu_device *adev = kiq_ring->adev; 5000 uint32_t scratch, tmp = 0; 5001 int r, i; 5002 5003 r = amdgpu_gfx_scratch_get(adev, &scratch); 5004 if (r) { 5005 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5006 return r; 5007 } 5008 WREG32(scratch, 0xCAFEDEAD); 5009 5010 r = amdgpu_ring_alloc(kiq_ring, 10); 5011 if (r) { 5012 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5013 amdgpu_gfx_scratch_free(adev, scratch); 5014 return r; 5015 } 5016 5017 /* unmap queues */ 5018 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5019 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5020 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5021 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5022 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5023 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5024 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5025 amdgpu_ring_write(kiq_ring, 0); 5026 amdgpu_ring_write(kiq_ring, 0); 5027 amdgpu_ring_write(kiq_ring, 0); 5028 /* write to scratch for completion */ 5029 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5030 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5031 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5032 amdgpu_ring_commit(kiq_ring); 5033 5034 for (i = 0; i < adev->usec_timeout; i++) { 5035 tmp = RREG32(scratch); 5036 if (tmp == 0xDEADBEEF) 5037 break; 5038 DRM_UDELAY(1); 5039 } 5040 if (i >= adev->usec_timeout) { 5041 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5042 r = -EINVAL; 5043 } 5044 amdgpu_gfx_scratch_free(adev, scratch); 5045 return r; 5046 } 5047 5048 static int gfx_v8_0_hw_fini(void *handle) 5049 { 5050 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5051 int i; 5052 5053 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5054 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5055 5056 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5057 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5058 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5059 5060 if (amdgpu_sriov_vf(adev)) { 5061 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5062 return 0; 5063 } 5064 gfx_v8_0_cp_enable(adev, false); 5065 gfx_v8_0_rlc_stop(adev); 5066 5067 amdgpu_device_ip_set_powergating_state(adev, 5068 AMD_IP_BLOCK_TYPE_GFX, 5069 AMD_PG_STATE_UNGATE); 5070 5071 return 0; 5072 } 5073 5074 static int gfx_v8_0_suspend(void *handle) 5075 { 5076 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5077 adev->gfx.in_suspend = true; 5078 return gfx_v8_0_hw_fini(adev); 5079 } 5080 5081 static int gfx_v8_0_resume(void *handle) 5082 { 5083 int r; 5084 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5085 5086 r = gfx_v8_0_hw_init(adev); 5087 adev->gfx.in_suspend = false; 5088 return r; 5089 } 5090 5091 static bool gfx_v8_0_is_idle(void *handle) 5092 { 5093 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5094 5095 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5096 return false; 5097 else 5098 return true; 5099 } 5100 5101 static int gfx_v8_0_wait_for_idle(void *handle) 5102 { 5103 unsigned i; 5104 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5105 5106 for (i = 0; i < adev->usec_timeout; i++) { 5107 if (gfx_v8_0_is_idle(handle)) 5108 return 0; 5109 5110 udelay(1); 5111 } 5112 return -ETIMEDOUT; 5113 } 5114 5115 static bool gfx_v8_0_check_soft_reset(void *handle) 5116 { 5117 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5118 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5119 u32 tmp; 5120 5121 /* GRBM_STATUS */ 5122 tmp = RREG32(mmGRBM_STATUS); 5123 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5124 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5125 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5126 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5127 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5128 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5129 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5130 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5131 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5132 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5133 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5134 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5135 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5136 } 5137 5138 /* GRBM_STATUS2 */ 5139 tmp = RREG32(mmGRBM_STATUS2); 5140 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5141 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5142 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5143 5144 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5145 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5146 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5148 SOFT_RESET_CPF, 1); 5149 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5150 SOFT_RESET_CPC, 1); 5151 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5152 SOFT_RESET_CPG, 1); 5153 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5154 SOFT_RESET_GRBM, 1); 5155 } 5156 5157 /* SRBM_STATUS */ 5158 tmp = RREG32(mmSRBM_STATUS); 5159 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5160 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5161 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5162 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5163 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5164 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5165 5166 if (grbm_soft_reset || srbm_soft_reset) { 5167 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5168 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5169 return true; 5170 } else { 5171 adev->gfx.grbm_soft_reset = 0; 5172 adev->gfx.srbm_soft_reset = 0; 5173 return false; 5174 } 5175 } 5176 5177 static int gfx_v8_0_pre_soft_reset(void *handle) 5178 { 5179 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5180 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5181 5182 if ((!adev->gfx.grbm_soft_reset) && 5183 (!adev->gfx.srbm_soft_reset)) 5184 return 0; 5185 5186 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5187 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5188 5189 /* stop the rlc */ 5190 gfx_v8_0_rlc_stop(adev); 5191 5192 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5193 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5194 /* Disable GFX parsing/prefetching */ 5195 gfx_v8_0_cp_gfx_enable(adev, false); 5196 5197 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5198 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5199 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5200 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5201 int i; 5202 5203 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5204 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5205 5206 mutex_lock(&adev->srbm_mutex); 5207 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5208 gfx_v8_0_deactivate_hqd(adev, 2); 5209 vi_srbm_select(adev, 0, 0, 0, 0); 5210 mutex_unlock(&adev->srbm_mutex); 5211 } 5212 /* Disable MEC parsing/prefetching */ 5213 gfx_v8_0_cp_compute_enable(adev, false); 5214 } 5215 5216 return 0; 5217 } 5218 5219 static int gfx_v8_0_soft_reset(void *handle) 5220 { 5221 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5222 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5223 u32 tmp; 5224 5225 if ((!adev->gfx.grbm_soft_reset) && 5226 (!adev->gfx.srbm_soft_reset)) 5227 return 0; 5228 5229 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5230 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5231 5232 if (grbm_soft_reset || srbm_soft_reset) { 5233 tmp = RREG32(mmGMCON_DEBUG); 5234 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5235 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5236 WREG32(mmGMCON_DEBUG, tmp); 5237 udelay(50); 5238 } 5239 5240 if (grbm_soft_reset) { 5241 tmp = RREG32(mmGRBM_SOFT_RESET); 5242 tmp |= grbm_soft_reset; 5243 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5244 WREG32(mmGRBM_SOFT_RESET, tmp); 5245 tmp = RREG32(mmGRBM_SOFT_RESET); 5246 5247 udelay(50); 5248 5249 tmp &= ~grbm_soft_reset; 5250 WREG32(mmGRBM_SOFT_RESET, tmp); 5251 tmp = RREG32(mmGRBM_SOFT_RESET); 5252 } 5253 5254 if (srbm_soft_reset) { 5255 tmp = RREG32(mmSRBM_SOFT_RESET); 5256 tmp |= srbm_soft_reset; 5257 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5258 WREG32(mmSRBM_SOFT_RESET, tmp); 5259 tmp = RREG32(mmSRBM_SOFT_RESET); 5260 5261 udelay(50); 5262 5263 tmp &= ~srbm_soft_reset; 5264 WREG32(mmSRBM_SOFT_RESET, tmp); 5265 tmp = RREG32(mmSRBM_SOFT_RESET); 5266 } 5267 5268 if (grbm_soft_reset || srbm_soft_reset) { 5269 tmp = RREG32(mmGMCON_DEBUG); 5270 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5271 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5272 WREG32(mmGMCON_DEBUG, tmp); 5273 } 5274 5275 /* Wait a little for things to settle down */ 5276 udelay(50); 5277 5278 return 0; 5279 } 5280 5281 static int gfx_v8_0_post_soft_reset(void *handle) 5282 { 5283 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5284 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5285 5286 if ((!adev->gfx.grbm_soft_reset) && 5287 (!adev->gfx.srbm_soft_reset)) 5288 return 0; 5289 5290 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5291 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5292 5293 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5294 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5295 gfx_v8_0_cp_gfx_resume(adev); 5296 5297 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5299 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5300 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5301 int i; 5302 5303 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5304 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5305 5306 mutex_lock(&adev->srbm_mutex); 5307 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5308 gfx_v8_0_deactivate_hqd(adev, 2); 5309 vi_srbm_select(adev, 0, 0, 0, 0); 5310 mutex_unlock(&adev->srbm_mutex); 5311 } 5312 gfx_v8_0_kiq_resume(adev); 5313 } 5314 gfx_v8_0_rlc_start(adev); 5315 5316 return 0; 5317 } 5318 5319 /** 5320 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5321 * 5322 * @adev: amdgpu_device pointer 5323 * 5324 * Fetches a GPU clock counter snapshot. 5325 * Returns the 64 bit clock counter snapshot. 5326 */ 5327 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5328 { 5329 uint64_t clock; 5330 5331 mutex_lock(&adev->gfx.gpu_clock_mutex); 5332 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5333 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5334 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5335 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5336 return clock; 5337 } 5338 5339 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5340 uint32_t vmid, 5341 uint32_t gds_base, uint32_t gds_size, 5342 uint32_t gws_base, uint32_t gws_size, 5343 uint32_t oa_base, uint32_t oa_size) 5344 { 5345 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5346 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5347 5348 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5349 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5350 5351 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5352 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5353 5354 /* GDS Base */ 5355 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5356 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5357 WRITE_DATA_DST_SEL(0))); 5358 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5359 amdgpu_ring_write(ring, 0); 5360 amdgpu_ring_write(ring, gds_base); 5361 5362 /* GDS Size */ 5363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5365 WRITE_DATA_DST_SEL(0))); 5366 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5367 amdgpu_ring_write(ring, 0); 5368 amdgpu_ring_write(ring, gds_size); 5369 5370 /* GWS */ 5371 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5372 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5373 WRITE_DATA_DST_SEL(0))); 5374 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5375 amdgpu_ring_write(ring, 0); 5376 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5377 5378 /* OA */ 5379 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5380 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5381 WRITE_DATA_DST_SEL(0))); 5382 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5383 amdgpu_ring_write(ring, 0); 5384 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5385 } 5386 5387 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5388 { 5389 WREG32(mmSQ_IND_INDEX, 5390 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5391 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5392 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5393 (SQ_IND_INDEX__FORCE_READ_MASK)); 5394 return RREG32(mmSQ_IND_DATA); 5395 } 5396 5397 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5398 uint32_t wave, uint32_t thread, 5399 uint32_t regno, uint32_t num, uint32_t *out) 5400 { 5401 WREG32(mmSQ_IND_INDEX, 5402 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5403 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5404 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5405 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5406 (SQ_IND_INDEX__FORCE_READ_MASK) | 5407 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5408 while (num--) 5409 *(out++) = RREG32(mmSQ_IND_DATA); 5410 } 5411 5412 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5413 { 5414 /* type 0 wave data */ 5415 dst[(*no_fields)++] = 0; 5416 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5417 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5418 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5419 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5420 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5421 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5422 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5423 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5424 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5425 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5426 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5427 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5428 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5429 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5430 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5431 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5432 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5434 } 5435 5436 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5437 uint32_t wave, uint32_t start, 5438 uint32_t size, uint32_t *dst) 5439 { 5440 wave_read_regs( 5441 adev, simd, wave, 0, 5442 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5443 } 5444 5445 5446 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5447 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5448 .select_se_sh = &gfx_v8_0_select_se_sh, 5449 .read_wave_data = &gfx_v8_0_read_wave_data, 5450 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5451 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5452 }; 5453 5454 static int gfx_v8_0_early_init(void *handle) 5455 { 5456 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5457 5458 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5459 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5460 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5461 gfx_v8_0_set_ring_funcs(adev); 5462 gfx_v8_0_set_irq_funcs(adev); 5463 gfx_v8_0_set_gds_init(adev); 5464 gfx_v8_0_set_rlc_funcs(adev); 5465 5466 return 0; 5467 } 5468 5469 static int gfx_v8_0_late_init(void *handle) 5470 { 5471 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5472 int r; 5473 5474 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5475 if (r) 5476 return r; 5477 5478 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5479 if (r) 5480 return r; 5481 5482 /* requires IBs so do in late init after IB pool is initialized */ 5483 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5484 if (r) 5485 return r; 5486 5487 amdgpu_device_ip_set_powergating_state(adev, 5488 AMD_IP_BLOCK_TYPE_GFX, 5489 AMD_PG_STATE_GATE); 5490 5491 return 0; 5492 } 5493 5494 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5495 bool enable) 5496 { 5497 if ((adev->asic_type == CHIP_POLARIS11) || 5498 (adev->asic_type == CHIP_POLARIS12)) 5499 /* Send msg to SMU via Powerplay */ 5500 amdgpu_device_ip_set_powergating_state(adev, 5501 AMD_IP_BLOCK_TYPE_SMC, 5502 enable ? 5503 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5504 5505 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5506 } 5507 5508 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5509 bool enable) 5510 { 5511 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5512 } 5513 5514 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5515 bool enable) 5516 { 5517 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5518 } 5519 5520 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5521 bool enable) 5522 { 5523 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5524 } 5525 5526 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5527 bool enable) 5528 { 5529 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5530 5531 /* Read any GFX register to wake up GFX. */ 5532 if (!enable) 5533 RREG32(mmDB_RENDER_CONTROL); 5534 } 5535 5536 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5537 bool enable) 5538 { 5539 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5540 cz_enable_gfx_cg_power_gating(adev, true); 5541 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5542 cz_enable_gfx_pipeline_power_gating(adev, true); 5543 } else { 5544 cz_enable_gfx_cg_power_gating(adev, false); 5545 cz_enable_gfx_pipeline_power_gating(adev, false); 5546 } 5547 } 5548 5549 static int gfx_v8_0_set_powergating_state(void *handle, 5550 enum amd_powergating_state state) 5551 { 5552 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5553 bool enable = (state == AMD_PG_STATE_GATE); 5554 5555 if (amdgpu_sriov_vf(adev)) 5556 return 0; 5557 5558 switch (adev->asic_type) { 5559 case CHIP_CARRIZO: 5560 case CHIP_STONEY: 5561 5562 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5563 cz_enable_sck_slow_down_on_power_up(adev, true); 5564 cz_enable_sck_slow_down_on_power_down(adev, true); 5565 } else { 5566 cz_enable_sck_slow_down_on_power_up(adev, false); 5567 cz_enable_sck_slow_down_on_power_down(adev, false); 5568 } 5569 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5570 cz_enable_cp_power_gating(adev, true); 5571 else 5572 cz_enable_cp_power_gating(adev, false); 5573 5574 cz_update_gfx_cg_power_gating(adev, enable); 5575 5576 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5577 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5578 else 5579 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5580 5581 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5582 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5583 else 5584 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5585 break; 5586 case CHIP_POLARIS11: 5587 case CHIP_POLARIS12: 5588 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5589 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5590 else 5591 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5592 5593 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5594 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5595 else 5596 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5597 5598 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5599 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5600 else 5601 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5602 break; 5603 default: 5604 break; 5605 } 5606 5607 return 0; 5608 } 5609 5610 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5611 { 5612 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5613 int data; 5614 5615 if (amdgpu_sriov_vf(adev)) 5616 *flags = 0; 5617 5618 /* AMD_CG_SUPPORT_GFX_MGCG */ 5619 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5620 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5621 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5622 5623 /* AMD_CG_SUPPORT_GFX_CGLG */ 5624 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5625 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5626 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5627 5628 /* AMD_CG_SUPPORT_GFX_CGLS */ 5629 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5630 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5631 5632 /* AMD_CG_SUPPORT_GFX_CGTS */ 5633 data = RREG32(mmCGTS_SM_CTRL_REG); 5634 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5635 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5636 5637 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5638 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5639 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5640 5641 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5642 data = RREG32(mmRLC_MEM_SLP_CNTL); 5643 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5644 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5645 5646 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5647 data = RREG32(mmCP_MEM_SLP_CNTL); 5648 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5649 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5650 } 5651 5652 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5653 uint32_t reg_addr, uint32_t cmd) 5654 { 5655 uint32_t data; 5656 5657 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5658 5659 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5660 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5661 5662 data = RREG32(mmRLC_SERDES_WR_CTRL); 5663 if (adev->asic_type == CHIP_STONEY) 5664 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5665 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5666 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5667 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5668 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5669 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5670 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5671 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5672 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5673 else 5674 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5675 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5676 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5677 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5678 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5679 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5680 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5681 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5682 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5683 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5684 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5685 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5686 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5687 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5688 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5689 5690 WREG32(mmRLC_SERDES_WR_CTRL, data); 5691 } 5692 5693 #define MSG_ENTER_RLC_SAFE_MODE 1 5694 #define MSG_EXIT_RLC_SAFE_MODE 0 5695 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5696 #define RLC_GPR_REG2__REQ__SHIFT 0 5697 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5698 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5699 5700 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5701 { 5702 u32 data; 5703 unsigned i; 5704 5705 data = RREG32(mmRLC_CNTL); 5706 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5707 return; 5708 5709 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5710 data |= RLC_SAFE_MODE__CMD_MASK; 5711 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5712 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5713 WREG32(mmRLC_SAFE_MODE, data); 5714 5715 for (i = 0; i < adev->usec_timeout; i++) { 5716 if ((RREG32(mmRLC_GPM_STAT) & 5717 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5718 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5719 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5720 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5721 break; 5722 udelay(1); 5723 } 5724 5725 for (i = 0; i < adev->usec_timeout; i++) { 5726 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5727 break; 5728 udelay(1); 5729 } 5730 adev->gfx.rlc.in_safe_mode = true; 5731 } 5732 } 5733 5734 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5735 { 5736 u32 data = 0; 5737 unsigned i; 5738 5739 data = RREG32(mmRLC_CNTL); 5740 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5741 return; 5742 5743 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5744 if (adev->gfx.rlc.in_safe_mode) { 5745 data |= RLC_SAFE_MODE__CMD_MASK; 5746 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5747 WREG32(mmRLC_SAFE_MODE, data); 5748 adev->gfx.rlc.in_safe_mode = false; 5749 } 5750 } 5751 5752 for (i = 0; i < adev->usec_timeout; i++) { 5753 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5754 break; 5755 udelay(1); 5756 } 5757 } 5758 5759 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5760 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5761 .exit_safe_mode = iceland_exit_rlc_safe_mode 5762 }; 5763 5764 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5765 bool enable) 5766 { 5767 uint32_t temp, data; 5768 5769 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5770 5771 /* It is disabled by HW by default */ 5772 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5773 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5774 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5775 /* 1 - RLC memory Light sleep */ 5776 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5777 5778 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5779 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5780 } 5781 5782 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5783 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5784 if (adev->flags & AMD_IS_APU) 5785 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5786 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5787 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5788 else 5789 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5790 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5791 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5792 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5793 5794 if (temp != data) 5795 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5796 5797 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5798 gfx_v8_0_wait_for_rlc_serdes(adev); 5799 5800 /* 5 - clear mgcg override */ 5801 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5802 5803 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5804 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5805 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5806 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5807 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5808 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5809 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5810 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5811 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5812 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5813 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5814 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5815 if (temp != data) 5816 WREG32(mmCGTS_SM_CTRL_REG, data); 5817 } 5818 udelay(50); 5819 5820 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5821 gfx_v8_0_wait_for_rlc_serdes(adev); 5822 } else { 5823 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5824 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5825 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5826 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5827 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5828 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5829 if (temp != data) 5830 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5831 5832 /* 2 - disable MGLS in RLC */ 5833 data = RREG32(mmRLC_MEM_SLP_CNTL); 5834 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5835 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5836 WREG32(mmRLC_MEM_SLP_CNTL, data); 5837 } 5838 5839 /* 3 - disable MGLS in CP */ 5840 data = RREG32(mmCP_MEM_SLP_CNTL); 5841 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5842 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5843 WREG32(mmCP_MEM_SLP_CNTL, data); 5844 } 5845 5846 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5847 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5848 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5849 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5850 if (temp != data) 5851 WREG32(mmCGTS_SM_CTRL_REG, data); 5852 5853 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5854 gfx_v8_0_wait_for_rlc_serdes(adev); 5855 5856 /* 6 - set mgcg override */ 5857 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5858 5859 udelay(50); 5860 5861 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5862 gfx_v8_0_wait_for_rlc_serdes(adev); 5863 } 5864 5865 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5866 } 5867 5868 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5869 bool enable) 5870 { 5871 uint32_t temp, temp1, data, data1; 5872 5873 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5874 5875 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5876 5877 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5878 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5879 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5880 if (temp1 != data1) 5881 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5882 5883 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5884 gfx_v8_0_wait_for_rlc_serdes(adev); 5885 5886 /* 2 - clear cgcg override */ 5887 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5888 5889 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5890 gfx_v8_0_wait_for_rlc_serdes(adev); 5891 5892 /* 3 - write cmd to set CGLS */ 5893 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5894 5895 /* 4 - enable cgcg */ 5896 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5897 5898 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5899 /* enable cgls*/ 5900 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5901 5902 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5903 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5904 5905 if (temp1 != data1) 5906 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5907 } else { 5908 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5909 } 5910 5911 if (temp != data) 5912 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5913 5914 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5915 * Cmp_busy/GFX_Idle interrupts 5916 */ 5917 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5918 } else { 5919 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5920 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5921 5922 /* TEST CGCG */ 5923 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5924 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5925 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5926 if (temp1 != data1) 5927 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5928 5929 /* read gfx register to wake up cgcg */ 5930 RREG32(mmCB_CGTT_SCLK_CTRL); 5931 RREG32(mmCB_CGTT_SCLK_CTRL); 5932 RREG32(mmCB_CGTT_SCLK_CTRL); 5933 RREG32(mmCB_CGTT_SCLK_CTRL); 5934 5935 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5936 gfx_v8_0_wait_for_rlc_serdes(adev); 5937 5938 /* write cmd to Set CGCG Overrride */ 5939 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5940 5941 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5942 gfx_v8_0_wait_for_rlc_serdes(adev); 5943 5944 /* write cmd to Clear CGLS */ 5945 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5946 5947 /* disable cgcg, cgls should be disabled too. */ 5948 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5949 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5950 if (temp != data) 5951 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5952 /* enable interrupts again for PG */ 5953 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5954 } 5955 5956 gfx_v8_0_wait_for_rlc_serdes(adev); 5957 5958 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5959 } 5960 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5961 bool enable) 5962 { 5963 if (enable) { 5964 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5965 * === MGCG + MGLS + TS(CG/LS) === 5966 */ 5967 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5968 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5969 } else { 5970 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5971 * === CGCG + CGLS === 5972 */ 5973 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5974 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5975 } 5976 return 0; 5977 } 5978 5979 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5980 enum amd_clockgating_state state) 5981 { 5982 uint32_t msg_id, pp_state = 0; 5983 uint32_t pp_support_state = 0; 5984 5985 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5986 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5987 pp_support_state = PP_STATE_SUPPORT_LS; 5988 pp_state = PP_STATE_LS; 5989 } 5990 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5991 pp_support_state |= PP_STATE_SUPPORT_CG; 5992 pp_state |= PP_STATE_CG; 5993 } 5994 if (state == AMD_CG_STATE_UNGATE) 5995 pp_state = 0; 5996 5997 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5998 PP_BLOCK_GFX_CG, 5999 pp_support_state, 6000 pp_state); 6001 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6002 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6003 } 6004 6005 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6007 pp_support_state = PP_STATE_SUPPORT_LS; 6008 pp_state = PP_STATE_LS; 6009 } 6010 6011 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6012 pp_support_state |= PP_STATE_SUPPORT_CG; 6013 pp_state |= PP_STATE_CG; 6014 } 6015 6016 if (state == AMD_CG_STATE_UNGATE) 6017 pp_state = 0; 6018 6019 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6020 PP_BLOCK_GFX_MG, 6021 pp_support_state, 6022 pp_state); 6023 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6024 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6025 } 6026 6027 return 0; 6028 } 6029 6030 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6031 enum amd_clockgating_state state) 6032 { 6033 6034 uint32_t msg_id, pp_state = 0; 6035 uint32_t pp_support_state = 0; 6036 6037 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6038 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6039 pp_support_state = PP_STATE_SUPPORT_LS; 6040 pp_state = PP_STATE_LS; 6041 } 6042 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6043 pp_support_state |= PP_STATE_SUPPORT_CG; 6044 pp_state |= PP_STATE_CG; 6045 } 6046 if (state == AMD_CG_STATE_UNGATE) 6047 pp_state = 0; 6048 6049 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6050 PP_BLOCK_GFX_CG, 6051 pp_support_state, 6052 pp_state); 6053 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6054 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6055 } 6056 6057 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6058 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6059 pp_support_state = PP_STATE_SUPPORT_LS; 6060 pp_state = PP_STATE_LS; 6061 } 6062 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6063 pp_support_state |= PP_STATE_SUPPORT_CG; 6064 pp_state |= PP_STATE_CG; 6065 } 6066 if (state == AMD_CG_STATE_UNGATE) 6067 pp_state = 0; 6068 6069 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6070 PP_BLOCK_GFX_3D, 6071 pp_support_state, 6072 pp_state); 6073 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6074 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6075 } 6076 6077 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6078 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6079 pp_support_state = PP_STATE_SUPPORT_LS; 6080 pp_state = PP_STATE_LS; 6081 } 6082 6083 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6084 pp_support_state |= PP_STATE_SUPPORT_CG; 6085 pp_state |= PP_STATE_CG; 6086 } 6087 6088 if (state == AMD_CG_STATE_UNGATE) 6089 pp_state = 0; 6090 6091 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6092 PP_BLOCK_GFX_MG, 6093 pp_support_state, 6094 pp_state); 6095 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6096 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6097 } 6098 6099 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6100 pp_support_state = PP_STATE_SUPPORT_LS; 6101 6102 if (state == AMD_CG_STATE_UNGATE) 6103 pp_state = 0; 6104 else 6105 pp_state = PP_STATE_LS; 6106 6107 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6108 PP_BLOCK_GFX_RLC, 6109 pp_support_state, 6110 pp_state); 6111 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6112 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6113 } 6114 6115 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6116 pp_support_state = PP_STATE_SUPPORT_LS; 6117 6118 if (state == AMD_CG_STATE_UNGATE) 6119 pp_state = 0; 6120 else 6121 pp_state = PP_STATE_LS; 6122 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6123 PP_BLOCK_GFX_CP, 6124 pp_support_state, 6125 pp_state); 6126 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6127 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6128 } 6129 6130 return 0; 6131 } 6132 6133 static int gfx_v8_0_set_clockgating_state(void *handle, 6134 enum amd_clockgating_state state) 6135 { 6136 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6137 6138 if (amdgpu_sriov_vf(adev)) 6139 return 0; 6140 6141 switch (adev->asic_type) { 6142 case CHIP_FIJI: 6143 case CHIP_CARRIZO: 6144 case CHIP_STONEY: 6145 gfx_v8_0_update_gfx_clock_gating(adev, 6146 state == AMD_CG_STATE_GATE); 6147 break; 6148 case CHIP_TONGA: 6149 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6150 break; 6151 case CHIP_POLARIS10: 6152 case CHIP_POLARIS11: 6153 case CHIP_POLARIS12: 6154 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6155 break; 6156 default: 6157 break; 6158 } 6159 return 0; 6160 } 6161 6162 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6163 { 6164 return ring->adev->wb.wb[ring->rptr_offs]; 6165 } 6166 6167 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6168 { 6169 struct amdgpu_device *adev = ring->adev; 6170 6171 if (ring->use_doorbell) 6172 /* XXX check if swapping is necessary on BE */ 6173 return ring->adev->wb.wb[ring->wptr_offs]; 6174 else 6175 return RREG32(mmCP_RB0_WPTR); 6176 } 6177 6178 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6179 { 6180 struct amdgpu_device *adev = ring->adev; 6181 6182 if (ring->use_doorbell) { 6183 /* XXX check if swapping is necessary on BE */ 6184 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6185 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6186 } else { 6187 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6188 (void)RREG32(mmCP_RB0_WPTR); 6189 } 6190 } 6191 6192 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6193 { 6194 u32 ref_and_mask, reg_mem_engine; 6195 6196 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6197 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6198 switch (ring->me) { 6199 case 1: 6200 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6201 break; 6202 case 2: 6203 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6204 break; 6205 default: 6206 return; 6207 } 6208 reg_mem_engine = 0; 6209 } else { 6210 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6211 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6212 } 6213 6214 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6215 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6216 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6217 reg_mem_engine)); 6218 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6219 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6220 amdgpu_ring_write(ring, ref_and_mask); 6221 amdgpu_ring_write(ring, ref_and_mask); 6222 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6223 } 6224 6225 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6226 { 6227 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6228 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6229 EVENT_INDEX(4)); 6230 6231 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6232 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6233 EVENT_INDEX(0)); 6234 } 6235 6236 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6237 struct amdgpu_ib *ib, 6238 unsigned vmid, bool ctx_switch) 6239 { 6240 u32 header, control = 0; 6241 6242 if (ib->flags & AMDGPU_IB_FLAG_CE) 6243 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6244 else 6245 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6246 6247 control |= ib->length_dw | (vmid << 24); 6248 6249 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6250 control |= INDIRECT_BUFFER_PRE_ENB(1); 6251 6252 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6253 gfx_v8_0_ring_emit_de_meta(ring); 6254 } 6255 6256 amdgpu_ring_write(ring, header); 6257 amdgpu_ring_write(ring, 6258 #ifdef __BIG_ENDIAN 6259 (2 << 0) | 6260 #endif 6261 (ib->gpu_addr & 0xFFFFFFFC)); 6262 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6263 amdgpu_ring_write(ring, control); 6264 } 6265 6266 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6267 struct amdgpu_ib *ib, 6268 unsigned vmid, bool ctx_switch) 6269 { 6270 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6271 6272 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6273 amdgpu_ring_write(ring, 6274 #ifdef __BIG_ENDIAN 6275 (2 << 0) | 6276 #endif 6277 (ib->gpu_addr & 0xFFFFFFFC)); 6278 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6279 amdgpu_ring_write(ring, control); 6280 } 6281 6282 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6283 u64 seq, unsigned flags) 6284 { 6285 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6286 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6287 6288 /* EVENT_WRITE_EOP - flush caches, send int */ 6289 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6290 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6291 EOP_TC_ACTION_EN | 6292 EOP_TC_WB_ACTION_EN | 6293 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6294 EVENT_INDEX(5))); 6295 amdgpu_ring_write(ring, addr & 0xfffffffc); 6296 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6297 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6298 amdgpu_ring_write(ring, lower_32_bits(seq)); 6299 amdgpu_ring_write(ring, upper_32_bits(seq)); 6300 6301 } 6302 6303 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6304 { 6305 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6306 uint32_t seq = ring->fence_drv.sync_seq; 6307 uint64_t addr = ring->fence_drv.gpu_addr; 6308 6309 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6310 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6311 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6312 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6313 amdgpu_ring_write(ring, addr & 0xfffffffc); 6314 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6315 amdgpu_ring_write(ring, seq); 6316 amdgpu_ring_write(ring, 0xffffffff); 6317 amdgpu_ring_write(ring, 4); /* poll interval */ 6318 } 6319 6320 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6321 unsigned vmid, uint64_t pd_addr) 6322 { 6323 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6324 6325 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6326 6327 /* wait for the invalidate to complete */ 6328 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6329 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6330 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6331 WAIT_REG_MEM_ENGINE(0))); /* me */ 6332 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6333 amdgpu_ring_write(ring, 0); 6334 amdgpu_ring_write(ring, 0); /* ref */ 6335 amdgpu_ring_write(ring, 0); /* mask */ 6336 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6337 6338 /* compute doesn't have PFP */ 6339 if (usepfp) { 6340 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6341 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6342 amdgpu_ring_write(ring, 0x0); 6343 } 6344 } 6345 6346 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6347 { 6348 return ring->adev->wb.wb[ring->wptr_offs]; 6349 } 6350 6351 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6352 { 6353 struct amdgpu_device *adev = ring->adev; 6354 6355 /* XXX check if swapping is necessary on BE */ 6356 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6357 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6358 } 6359 6360 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6361 bool acquire) 6362 { 6363 struct amdgpu_device *adev = ring->adev; 6364 int pipe_num, tmp, reg; 6365 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6366 6367 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6368 6369 /* first me only has 2 entries, GFX and HP3D */ 6370 if (ring->me > 0) 6371 pipe_num -= 2; 6372 6373 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6374 tmp = RREG32(reg); 6375 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6376 WREG32(reg, tmp); 6377 } 6378 6379 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6380 struct amdgpu_ring *ring, 6381 bool acquire) 6382 { 6383 int i, pipe; 6384 bool reserve; 6385 struct amdgpu_ring *iring; 6386 6387 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6388 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6389 if (acquire) 6390 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6391 else 6392 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6393 6394 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6395 /* Clear all reservations - everyone reacquires all resources */ 6396 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6397 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6398 true); 6399 6400 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6401 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6402 true); 6403 } else { 6404 /* Lower all pipes without a current reservation */ 6405 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6406 iring = &adev->gfx.gfx_ring[i]; 6407 pipe = amdgpu_gfx_queue_to_bit(adev, 6408 iring->me, 6409 iring->pipe, 6410 0); 6411 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6412 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6413 } 6414 6415 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6416 iring = &adev->gfx.compute_ring[i]; 6417 pipe = amdgpu_gfx_queue_to_bit(adev, 6418 iring->me, 6419 iring->pipe, 6420 0); 6421 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6422 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6423 } 6424 } 6425 6426 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6427 } 6428 6429 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6430 struct amdgpu_ring *ring, 6431 bool acquire) 6432 { 6433 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6434 uint32_t queue_priority = acquire ? 0xf : 0x0; 6435 6436 mutex_lock(&adev->srbm_mutex); 6437 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6438 6439 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6440 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6441 6442 vi_srbm_select(adev, 0, 0, 0, 0); 6443 mutex_unlock(&adev->srbm_mutex); 6444 } 6445 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6446 enum drm_sched_priority priority) 6447 { 6448 struct amdgpu_device *adev = ring->adev; 6449 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6450 6451 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6452 return; 6453 6454 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6455 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6456 } 6457 6458 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6459 u64 addr, u64 seq, 6460 unsigned flags) 6461 { 6462 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6463 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6464 6465 /* RELEASE_MEM - flush caches, send int */ 6466 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6467 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6468 EOP_TC_ACTION_EN | 6469 EOP_TC_WB_ACTION_EN | 6470 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6471 EVENT_INDEX(5))); 6472 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6473 amdgpu_ring_write(ring, addr & 0xfffffffc); 6474 amdgpu_ring_write(ring, upper_32_bits(addr)); 6475 amdgpu_ring_write(ring, lower_32_bits(seq)); 6476 amdgpu_ring_write(ring, upper_32_bits(seq)); 6477 } 6478 6479 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6480 u64 seq, unsigned int flags) 6481 { 6482 /* we only allocate 32bit for each seq wb address */ 6483 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6484 6485 /* write fence seq to the "addr" */ 6486 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6487 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6488 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6489 amdgpu_ring_write(ring, lower_32_bits(addr)); 6490 amdgpu_ring_write(ring, upper_32_bits(addr)); 6491 amdgpu_ring_write(ring, lower_32_bits(seq)); 6492 6493 if (flags & AMDGPU_FENCE_FLAG_INT) { 6494 /* set register to trigger INT */ 6495 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6496 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6497 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6498 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6499 amdgpu_ring_write(ring, 0); 6500 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6501 } 6502 } 6503 6504 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6505 { 6506 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6507 amdgpu_ring_write(ring, 0); 6508 } 6509 6510 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6511 { 6512 uint32_t dw2 = 0; 6513 6514 if (amdgpu_sriov_vf(ring->adev)) 6515 gfx_v8_0_ring_emit_ce_meta(ring); 6516 6517 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6518 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6519 gfx_v8_0_ring_emit_vgt_flush(ring); 6520 /* set load_global_config & load_global_uconfig */ 6521 dw2 |= 0x8001; 6522 /* set load_cs_sh_regs */ 6523 dw2 |= 0x01000000; 6524 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6525 dw2 |= 0x10002; 6526 6527 /* set load_ce_ram if preamble presented */ 6528 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6529 dw2 |= 0x10000000; 6530 } else { 6531 /* still load_ce_ram if this is the first time preamble presented 6532 * although there is no context switch happens. 6533 */ 6534 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6535 dw2 |= 0x10000000; 6536 } 6537 6538 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6539 amdgpu_ring_write(ring, dw2); 6540 amdgpu_ring_write(ring, 0); 6541 } 6542 6543 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6544 { 6545 unsigned ret; 6546 6547 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6548 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6549 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6550 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6551 ret = ring->wptr & ring->buf_mask; 6552 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6553 return ret; 6554 } 6555 6556 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6557 { 6558 unsigned cur; 6559 6560 BUG_ON(offset > ring->buf_mask); 6561 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6562 6563 cur = (ring->wptr & ring->buf_mask) - 1; 6564 if (likely(cur > offset)) 6565 ring->ring[offset] = cur - offset; 6566 else 6567 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6568 } 6569 6570 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6571 { 6572 struct amdgpu_device *adev = ring->adev; 6573 6574 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6575 amdgpu_ring_write(ring, 0 | /* src: register*/ 6576 (5 << 8) | /* dst: memory */ 6577 (1 << 20)); /* write confirm */ 6578 amdgpu_ring_write(ring, reg); 6579 amdgpu_ring_write(ring, 0); 6580 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6581 adev->virt.reg_val_offs * 4)); 6582 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6583 adev->virt.reg_val_offs * 4)); 6584 } 6585 6586 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6587 uint32_t val) 6588 { 6589 uint32_t cmd; 6590 6591 switch (ring->funcs->type) { 6592 case AMDGPU_RING_TYPE_GFX: 6593 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6594 break; 6595 case AMDGPU_RING_TYPE_KIQ: 6596 cmd = 1 << 16; /* no inc addr */ 6597 break; 6598 default: 6599 cmd = WR_CONFIRM; 6600 break; 6601 } 6602 6603 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6604 amdgpu_ring_write(ring, cmd); 6605 amdgpu_ring_write(ring, reg); 6606 amdgpu_ring_write(ring, 0); 6607 amdgpu_ring_write(ring, val); 6608 } 6609 6610 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6611 enum amdgpu_interrupt_state state) 6612 { 6613 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6614 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6615 } 6616 6617 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6618 int me, int pipe, 6619 enum amdgpu_interrupt_state state) 6620 { 6621 u32 mec_int_cntl, mec_int_cntl_reg; 6622 6623 /* 6624 * amdgpu controls only the first MEC. That's why this function only 6625 * handles the setting of interrupts for this specific MEC. All other 6626 * pipes' interrupts are set by amdkfd. 6627 */ 6628 6629 if (me == 1) { 6630 switch (pipe) { 6631 case 0: 6632 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6633 break; 6634 case 1: 6635 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6636 break; 6637 case 2: 6638 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6639 break; 6640 case 3: 6641 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6642 break; 6643 default: 6644 DRM_DEBUG("invalid pipe %d\n", pipe); 6645 return; 6646 } 6647 } else { 6648 DRM_DEBUG("invalid me %d\n", me); 6649 return; 6650 } 6651 6652 switch (state) { 6653 case AMDGPU_IRQ_STATE_DISABLE: 6654 mec_int_cntl = RREG32(mec_int_cntl_reg); 6655 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6656 WREG32(mec_int_cntl_reg, mec_int_cntl); 6657 break; 6658 case AMDGPU_IRQ_STATE_ENABLE: 6659 mec_int_cntl = RREG32(mec_int_cntl_reg); 6660 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6661 WREG32(mec_int_cntl_reg, mec_int_cntl); 6662 break; 6663 default: 6664 break; 6665 } 6666 } 6667 6668 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6669 struct amdgpu_irq_src *source, 6670 unsigned type, 6671 enum amdgpu_interrupt_state state) 6672 { 6673 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6674 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6675 6676 return 0; 6677 } 6678 6679 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6680 struct amdgpu_irq_src *source, 6681 unsigned type, 6682 enum amdgpu_interrupt_state state) 6683 { 6684 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6685 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6686 6687 return 0; 6688 } 6689 6690 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6691 struct amdgpu_irq_src *src, 6692 unsigned type, 6693 enum amdgpu_interrupt_state state) 6694 { 6695 switch (type) { 6696 case AMDGPU_CP_IRQ_GFX_EOP: 6697 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6698 break; 6699 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6700 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6701 break; 6702 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6703 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6704 break; 6705 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6706 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6707 break; 6708 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6709 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6710 break; 6711 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6712 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6713 break; 6714 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6715 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6716 break; 6717 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6718 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6719 break; 6720 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6721 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6722 break; 6723 default: 6724 break; 6725 } 6726 return 0; 6727 } 6728 6729 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6730 struct amdgpu_irq_src *source, 6731 struct amdgpu_iv_entry *entry) 6732 { 6733 int i; 6734 u8 me_id, pipe_id, queue_id; 6735 struct amdgpu_ring *ring; 6736 6737 DRM_DEBUG("IH: CP EOP\n"); 6738 me_id = (entry->ring_id & 0x0c) >> 2; 6739 pipe_id = (entry->ring_id & 0x03) >> 0; 6740 queue_id = (entry->ring_id & 0x70) >> 4; 6741 6742 switch (me_id) { 6743 case 0: 6744 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6745 break; 6746 case 1: 6747 case 2: 6748 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6749 ring = &adev->gfx.compute_ring[i]; 6750 /* Per-queue interrupt is supported for MEC starting from VI. 6751 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6752 */ 6753 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6754 amdgpu_fence_process(ring); 6755 } 6756 break; 6757 } 6758 return 0; 6759 } 6760 6761 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6762 struct amdgpu_irq_src *source, 6763 struct amdgpu_iv_entry *entry) 6764 { 6765 DRM_ERROR("Illegal register access in command stream\n"); 6766 schedule_work(&adev->reset_work); 6767 return 0; 6768 } 6769 6770 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6771 struct amdgpu_irq_src *source, 6772 struct amdgpu_iv_entry *entry) 6773 { 6774 DRM_ERROR("Illegal instruction in command stream\n"); 6775 schedule_work(&adev->reset_work); 6776 return 0; 6777 } 6778 6779 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6780 struct amdgpu_irq_src *src, 6781 unsigned int type, 6782 enum amdgpu_interrupt_state state) 6783 { 6784 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6785 6786 switch (type) { 6787 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6788 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6789 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6790 if (ring->me == 1) 6791 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6792 ring->pipe, 6793 GENERIC2_INT_ENABLE, 6794 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6795 else 6796 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6797 ring->pipe, 6798 GENERIC2_INT_ENABLE, 6799 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6800 break; 6801 default: 6802 BUG(); /* kiq only support GENERIC2_INT now */ 6803 break; 6804 } 6805 return 0; 6806 } 6807 6808 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 6809 struct amdgpu_irq_src *source, 6810 struct amdgpu_iv_entry *entry) 6811 { 6812 u8 me_id, pipe_id, queue_id; 6813 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6814 6815 me_id = (entry->ring_id & 0x0c) >> 2; 6816 pipe_id = (entry->ring_id & 0x03) >> 0; 6817 queue_id = (entry->ring_id & 0x70) >> 4; 6818 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 6819 me_id, pipe_id, queue_id); 6820 6821 amdgpu_fence_process(ring); 6822 return 0; 6823 } 6824 6825 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6826 .name = "gfx_v8_0", 6827 .early_init = gfx_v8_0_early_init, 6828 .late_init = gfx_v8_0_late_init, 6829 .sw_init = gfx_v8_0_sw_init, 6830 .sw_fini = gfx_v8_0_sw_fini, 6831 .hw_init = gfx_v8_0_hw_init, 6832 .hw_fini = gfx_v8_0_hw_fini, 6833 .suspend = gfx_v8_0_suspend, 6834 .resume = gfx_v8_0_resume, 6835 .is_idle = gfx_v8_0_is_idle, 6836 .wait_for_idle = gfx_v8_0_wait_for_idle, 6837 .check_soft_reset = gfx_v8_0_check_soft_reset, 6838 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6839 .soft_reset = gfx_v8_0_soft_reset, 6840 .post_soft_reset = gfx_v8_0_post_soft_reset, 6841 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6842 .set_powergating_state = gfx_v8_0_set_powergating_state, 6843 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6844 }; 6845 6846 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6847 .type = AMDGPU_RING_TYPE_GFX, 6848 .align_mask = 0xff, 6849 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6850 .support_64bit_ptrs = false, 6851 .get_rptr = gfx_v8_0_ring_get_rptr, 6852 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6853 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6854 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6855 5 + /* COND_EXEC */ 6856 7 + /* PIPELINE_SYNC */ 6857 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 6858 8 + /* FENCE for VM_FLUSH */ 6859 20 + /* GDS switch */ 6860 4 + /* double SWITCH_BUFFER, 6861 the first COND_EXEC jump to the place just 6862 prior to this double SWITCH_BUFFER */ 6863 5 + /* COND_EXEC */ 6864 7 + /* HDP_flush */ 6865 4 + /* VGT_flush */ 6866 14 + /* CE_META */ 6867 31 + /* DE_META */ 6868 3 + /* CNTX_CTRL */ 6869 5 + /* HDP_INVL */ 6870 8 + 8 + /* FENCE x2 */ 6871 2, /* SWITCH_BUFFER */ 6872 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6873 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6874 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6875 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6876 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6877 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6878 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6879 .test_ring = gfx_v8_0_ring_test_ring, 6880 .test_ib = gfx_v8_0_ring_test_ib, 6881 .insert_nop = amdgpu_ring_insert_nop, 6882 .pad_ib = amdgpu_ring_generic_pad_ib, 6883 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6884 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6885 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6886 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6887 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6888 }; 6889 6890 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6891 .type = AMDGPU_RING_TYPE_COMPUTE, 6892 .align_mask = 0xff, 6893 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6894 .support_64bit_ptrs = false, 6895 .get_rptr = gfx_v8_0_ring_get_rptr, 6896 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6897 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6898 .emit_frame_size = 6899 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6900 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6901 5 + /* hdp_invalidate */ 6902 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6903 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6904 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6905 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6906 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6907 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6908 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6909 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6910 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6911 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6912 .test_ring = gfx_v8_0_ring_test_ring, 6913 .test_ib = gfx_v8_0_ring_test_ib, 6914 .insert_nop = amdgpu_ring_insert_nop, 6915 .pad_ib = amdgpu_ring_generic_pad_ib, 6916 .set_priority = gfx_v8_0_ring_set_priority_compute, 6917 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6918 }; 6919 6920 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6921 .type = AMDGPU_RING_TYPE_KIQ, 6922 .align_mask = 0xff, 6923 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6924 .support_64bit_ptrs = false, 6925 .get_rptr = gfx_v8_0_ring_get_rptr, 6926 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6927 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6928 .emit_frame_size = 6929 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6930 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6931 5 + /* hdp_invalidate */ 6932 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6933 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6934 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6935 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6936 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6937 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6938 .test_ring = gfx_v8_0_ring_test_ring, 6939 .test_ib = gfx_v8_0_ring_test_ib, 6940 .insert_nop = amdgpu_ring_insert_nop, 6941 .pad_ib = amdgpu_ring_generic_pad_ib, 6942 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6943 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6944 }; 6945 6946 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6947 { 6948 int i; 6949 6950 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6951 6952 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6953 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6954 6955 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6956 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6957 } 6958 6959 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6960 .set = gfx_v8_0_set_eop_interrupt_state, 6961 .process = gfx_v8_0_eop_irq, 6962 }; 6963 6964 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6965 .set = gfx_v8_0_set_priv_reg_fault_state, 6966 .process = gfx_v8_0_priv_reg_irq, 6967 }; 6968 6969 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6970 .set = gfx_v8_0_set_priv_inst_fault_state, 6971 .process = gfx_v8_0_priv_inst_irq, 6972 }; 6973 6974 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 6975 .set = gfx_v8_0_kiq_set_interrupt_state, 6976 .process = gfx_v8_0_kiq_irq, 6977 }; 6978 6979 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6980 { 6981 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6982 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6983 6984 adev->gfx.priv_reg_irq.num_types = 1; 6985 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6986 6987 adev->gfx.priv_inst_irq.num_types = 1; 6988 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6989 6990 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 6991 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 6992 } 6993 6994 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6995 { 6996 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6997 } 6998 6999 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7000 { 7001 /* init asci gds info */ 7002 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7003 adev->gds.gws.total_size = 64; 7004 adev->gds.oa.total_size = 16; 7005 7006 if (adev->gds.mem.total_size == 64 * 1024) { 7007 adev->gds.mem.gfx_partition_size = 4096; 7008 adev->gds.mem.cs_partition_size = 4096; 7009 7010 adev->gds.gws.gfx_partition_size = 4; 7011 adev->gds.gws.cs_partition_size = 4; 7012 7013 adev->gds.oa.gfx_partition_size = 4; 7014 adev->gds.oa.cs_partition_size = 1; 7015 } else { 7016 adev->gds.mem.gfx_partition_size = 1024; 7017 adev->gds.mem.cs_partition_size = 1024; 7018 7019 adev->gds.gws.gfx_partition_size = 16; 7020 adev->gds.gws.cs_partition_size = 16; 7021 7022 adev->gds.oa.gfx_partition_size = 4; 7023 adev->gds.oa.cs_partition_size = 4; 7024 } 7025 } 7026 7027 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7028 u32 bitmap) 7029 { 7030 u32 data; 7031 7032 if (!bitmap) 7033 return; 7034 7035 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7036 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7037 7038 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7039 } 7040 7041 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7042 { 7043 u32 data, mask; 7044 7045 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7046 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7047 7048 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7049 7050 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7051 } 7052 7053 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7054 { 7055 int i, j, k, counter, active_cu_number = 0; 7056 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7057 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7058 unsigned disable_masks[4 * 2]; 7059 u32 ao_cu_num; 7060 7061 memset(cu_info, 0, sizeof(*cu_info)); 7062 7063 if (adev->flags & AMD_IS_APU) 7064 ao_cu_num = 2; 7065 else 7066 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7067 7068 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7069 7070 mutex_lock(&adev->grbm_idx_mutex); 7071 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7072 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7073 mask = 1; 7074 ao_bitmap = 0; 7075 counter = 0; 7076 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7077 if (i < 4 && j < 2) 7078 gfx_v8_0_set_user_cu_inactive_bitmap( 7079 adev, disable_masks[i * 2 + j]); 7080 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7081 cu_info->bitmap[i][j] = bitmap; 7082 7083 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7084 if (bitmap & mask) { 7085 if (counter < ao_cu_num) 7086 ao_bitmap |= mask; 7087 counter ++; 7088 } 7089 mask <<= 1; 7090 } 7091 active_cu_number += counter; 7092 if (i < 2 && j < 2) 7093 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7094 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7095 } 7096 } 7097 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7098 mutex_unlock(&adev->grbm_idx_mutex); 7099 7100 cu_info->number = active_cu_number; 7101 cu_info->ao_cu_mask = ao_cu_mask; 7102 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7103 cu_info->max_waves_per_simd = 10; 7104 cu_info->max_scratch_slots_per_cu = 32; 7105 cu_info->wave_front_size = 64; 7106 cu_info->lds_size = 64; 7107 } 7108 7109 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7110 { 7111 .type = AMD_IP_BLOCK_TYPE_GFX, 7112 .major = 8, 7113 .minor = 0, 7114 .rev = 0, 7115 .funcs = &gfx_v8_0_ip_funcs, 7116 }; 7117 7118 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7119 { 7120 .type = AMD_IP_BLOCK_TYPE_GFX, 7121 .major = 8, 7122 .minor = 1, 7123 .rev = 0, 7124 .funcs = &gfx_v8_0_ip_funcs, 7125 }; 7126 7127 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7128 { 7129 uint64_t ce_payload_addr; 7130 int cnt_ce; 7131 union { 7132 struct vi_ce_ib_state regular; 7133 struct vi_ce_ib_state_chained_ib chained; 7134 } ce_payload = {}; 7135 7136 if (ring->adev->virt.chained_ib_support) { 7137 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7138 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7139 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7140 } else { 7141 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7142 offsetof(struct vi_gfx_meta_data, ce_payload); 7143 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7144 } 7145 7146 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7147 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7148 WRITE_DATA_DST_SEL(8) | 7149 WR_CONFIRM) | 7150 WRITE_DATA_CACHE_POLICY(0)); 7151 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7152 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7153 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7154 } 7155 7156 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7157 { 7158 uint64_t de_payload_addr, gds_addr, csa_addr; 7159 int cnt_de; 7160 union { 7161 struct vi_de_ib_state regular; 7162 struct vi_de_ib_state_chained_ib chained; 7163 } de_payload = {}; 7164 7165 csa_addr = amdgpu_csa_vaddr(ring->adev); 7166 gds_addr = csa_addr + 4096; 7167 if (ring->adev->virt.chained_ib_support) { 7168 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7169 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7170 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7171 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7172 } else { 7173 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7174 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7175 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7176 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7177 } 7178 7179 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7180 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7181 WRITE_DATA_DST_SEL(8) | 7182 WR_CONFIRM) | 7183 WRITE_DATA_CACHE_POLICY(0)); 7184 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7185 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7186 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7187 } 7188