1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_MEC_HPD_SIZE 2048 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 139 140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 151 152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 163 164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 165 { 166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 167 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 168 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 169 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 170 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 171 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 172 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 173 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 174 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 175 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 176 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 177 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 178 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 179 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 180 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 181 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 182 }; 183 184 static const u32 golden_settings_tonga_a11[] = 185 { 186 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 187 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 188 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 189 mmGB_GPU_ID, 0x0000000f, 0x00000000, 190 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 191 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 192 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 193 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 194 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 195 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 196 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 197 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 198 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 199 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 200 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 201 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 202 }; 203 204 static const u32 tonga_golden_common_all[] = 205 { 206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 207 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 208 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 209 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 210 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 211 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 212 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 213 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 214 }; 215 216 static const u32 tonga_mgcg_cgcg_init[] = 217 { 218 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 220 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 221 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 222 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 223 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 224 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 225 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 227 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 228 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 229 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 234 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 236 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 237 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 238 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 239 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 240 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 243 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 244 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 245 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 246 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 247 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 248 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 249 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 265 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 266 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 267 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 268 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 269 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 270 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 271 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 272 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 273 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 274 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 275 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 276 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 277 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 278 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 279 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 280 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 281 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 282 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 283 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 284 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 285 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 286 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 287 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 288 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 289 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 290 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 291 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 293 }; 294 295 static const u32 golden_settings_polaris11_a11[] = 296 { 297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 298 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 299 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 300 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 301 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 302 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 303 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 304 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 305 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 306 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 307 mmSQ_CONFIG, 0x07f80000, 0x01180000, 308 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 309 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 310 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 311 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 312 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 314 }; 315 316 static const u32 polaris11_golden_common_all[] = 317 { 318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 319 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 320 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 321 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 322 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 323 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 324 }; 325 326 static const u32 golden_settings_polaris10_a11[] = 327 { 328 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 329 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 330 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 331 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 332 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 333 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 334 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 335 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 336 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 337 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 338 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 339 mmSQ_CONFIG, 0x07f80000, 0x07180000, 340 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 341 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 342 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 343 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 344 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 345 }; 346 347 static const u32 polaris10_golden_common_all[] = 348 { 349 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 350 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 351 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 352 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 353 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 354 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 355 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 356 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 357 }; 358 359 static const u32 fiji_golden_common_all[] = 360 { 361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 362 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 363 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 364 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 365 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 366 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 367 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 368 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 370 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 371 }; 372 373 static const u32 golden_settings_fiji_a10[] = 374 { 375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 376 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 379 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 380 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 381 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 382 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 383 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 384 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 385 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 386 }; 387 388 static const u32 fiji_mgcg_cgcg_init[] = 389 { 390 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 391 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 392 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 393 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 394 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 395 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 396 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 397 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 399 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 400 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 401 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 402 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 403 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 404 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 405 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 406 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 407 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 408 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 409 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 410 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 411 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 412 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 413 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 414 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 415 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 416 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 417 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 418 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 419 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 421 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 422 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 423 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 424 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 425 }; 426 427 static const u32 golden_settings_iceland_a11[] = 428 { 429 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 430 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 431 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 432 mmGB_GPU_ID, 0x0000000f, 0x00000000, 433 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 434 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 435 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 436 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 437 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 438 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 439 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 440 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 441 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 442 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 443 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 444 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 445 }; 446 447 static const u32 iceland_golden_common_all[] = 448 { 449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 450 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 451 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 452 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 453 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 454 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 455 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 456 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 457 }; 458 459 static const u32 iceland_mgcg_cgcg_init[] = 460 { 461 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 463 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 464 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 465 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 466 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 467 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 468 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 469 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 470 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 471 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 472 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 473 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 474 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 475 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 476 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 477 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 478 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 479 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 480 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 481 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 482 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 483 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 484 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 485 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 486 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 487 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 488 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 489 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 490 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 491 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 492 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 493 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 494 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 495 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 496 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 497 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 498 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 499 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 500 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 501 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 502 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 503 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 504 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 505 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 506 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 507 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 508 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 509 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 510 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 511 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 512 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 513 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 514 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 515 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 516 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 517 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 518 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 519 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 520 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 521 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 522 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 523 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 524 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 525 }; 526 527 static const u32 cz_golden_settings_a11[] = 528 { 529 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 530 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 531 mmGB_GPU_ID, 0x0000000f, 0x00000000, 532 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 533 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 534 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 535 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 536 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 537 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 538 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 539 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 540 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 541 }; 542 543 static const u32 cz_golden_common_all[] = 544 { 545 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 546 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 547 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 548 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 549 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 550 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 551 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 552 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 553 }; 554 555 static const u32 cz_mgcg_cgcg_init[] = 556 { 557 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 558 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 559 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 560 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 561 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 562 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 563 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 564 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 566 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 567 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 568 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 569 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 570 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 571 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 572 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 573 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 574 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 575 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 576 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 577 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 578 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 579 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 580 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 581 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 582 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 583 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 584 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 585 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 586 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 587 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 588 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 589 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 590 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 591 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 592 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 593 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 594 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 595 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 596 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 597 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 598 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 599 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 600 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 601 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 602 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 603 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 604 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 605 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 606 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 607 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 608 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 609 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 610 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 611 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 612 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 613 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 614 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 615 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 616 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 617 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 618 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 619 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 620 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 621 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 622 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 623 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 624 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 625 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 626 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 627 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 628 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 629 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 631 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 632 }; 633 634 static const u32 stoney_golden_settings_a11[] = 635 { 636 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 637 mmGB_GPU_ID, 0x0000000f, 0x00000000, 638 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 639 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 640 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 641 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 642 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 643 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 644 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 645 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 646 }; 647 648 static const u32 stoney_golden_common_all[] = 649 { 650 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 651 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 652 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 653 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 654 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 655 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 656 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 657 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 658 }; 659 660 static const u32 stoney_mgcg_cgcg_init[] = 661 { 662 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 663 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 664 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 665 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 667 }; 668 669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 677 678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 679 { 680 switch (adev->asic_type) { 681 case CHIP_TOPAZ: 682 amdgpu_device_program_register_sequence(adev, 683 iceland_mgcg_cgcg_init, 684 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 685 amdgpu_device_program_register_sequence(adev, 686 golden_settings_iceland_a11, 687 ARRAY_SIZE(golden_settings_iceland_a11)); 688 amdgpu_device_program_register_sequence(adev, 689 iceland_golden_common_all, 690 ARRAY_SIZE(iceland_golden_common_all)); 691 break; 692 case CHIP_FIJI: 693 amdgpu_device_program_register_sequence(adev, 694 fiji_mgcg_cgcg_init, 695 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 696 amdgpu_device_program_register_sequence(adev, 697 golden_settings_fiji_a10, 698 ARRAY_SIZE(golden_settings_fiji_a10)); 699 amdgpu_device_program_register_sequence(adev, 700 fiji_golden_common_all, 701 ARRAY_SIZE(fiji_golden_common_all)); 702 break; 703 704 case CHIP_TONGA: 705 amdgpu_device_program_register_sequence(adev, 706 tonga_mgcg_cgcg_init, 707 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 708 amdgpu_device_program_register_sequence(adev, 709 golden_settings_tonga_a11, 710 ARRAY_SIZE(golden_settings_tonga_a11)); 711 amdgpu_device_program_register_sequence(adev, 712 tonga_golden_common_all, 713 ARRAY_SIZE(tonga_golden_common_all)); 714 break; 715 case CHIP_POLARIS11: 716 case CHIP_POLARIS12: 717 amdgpu_device_program_register_sequence(adev, 718 golden_settings_polaris11_a11, 719 ARRAY_SIZE(golden_settings_polaris11_a11)); 720 amdgpu_device_program_register_sequence(adev, 721 polaris11_golden_common_all, 722 ARRAY_SIZE(polaris11_golden_common_all)); 723 break; 724 case CHIP_POLARIS10: 725 amdgpu_device_program_register_sequence(adev, 726 golden_settings_polaris10_a11, 727 ARRAY_SIZE(golden_settings_polaris10_a11)); 728 amdgpu_device_program_register_sequence(adev, 729 polaris10_golden_common_all, 730 ARRAY_SIZE(polaris10_golden_common_all)); 731 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 732 if (adev->pdev->revision == 0xc7 && 733 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 734 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 735 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 736 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 737 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 738 } 739 break; 740 case CHIP_CARRIZO: 741 amdgpu_device_program_register_sequence(adev, 742 cz_mgcg_cgcg_init, 743 ARRAY_SIZE(cz_mgcg_cgcg_init)); 744 amdgpu_device_program_register_sequence(adev, 745 cz_golden_settings_a11, 746 ARRAY_SIZE(cz_golden_settings_a11)); 747 amdgpu_device_program_register_sequence(adev, 748 cz_golden_common_all, 749 ARRAY_SIZE(cz_golden_common_all)); 750 break; 751 case CHIP_STONEY: 752 amdgpu_device_program_register_sequence(adev, 753 stoney_mgcg_cgcg_init, 754 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 755 amdgpu_device_program_register_sequence(adev, 756 stoney_golden_settings_a11, 757 ARRAY_SIZE(stoney_golden_settings_a11)); 758 amdgpu_device_program_register_sequence(adev, 759 stoney_golden_common_all, 760 ARRAY_SIZE(stoney_golden_common_all)); 761 break; 762 default: 763 break; 764 } 765 } 766 767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 768 { 769 adev->gfx.scratch.num_reg = 8; 770 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 772 } 773 774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 775 { 776 struct amdgpu_device *adev = ring->adev; 777 uint32_t scratch; 778 uint32_t tmp = 0; 779 unsigned i; 780 int r; 781 782 r = amdgpu_gfx_scratch_get(adev, &scratch); 783 if (r) { 784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 785 return r; 786 } 787 WREG32(scratch, 0xCAFEDEAD); 788 r = amdgpu_ring_alloc(ring, 3); 789 if (r) { 790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 791 ring->idx, r); 792 amdgpu_gfx_scratch_free(adev, scratch); 793 return r; 794 } 795 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 796 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 797 amdgpu_ring_write(ring, 0xDEADBEEF); 798 amdgpu_ring_commit(ring); 799 800 for (i = 0; i < adev->usec_timeout; i++) { 801 tmp = RREG32(scratch); 802 if (tmp == 0xDEADBEEF) 803 break; 804 DRM_UDELAY(1); 805 } 806 if (i < adev->usec_timeout) { 807 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 808 ring->idx, i); 809 } else { 810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 811 ring->idx, scratch, tmp); 812 r = -EINVAL; 813 } 814 amdgpu_gfx_scratch_free(adev, scratch); 815 return r; 816 } 817 818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 819 { 820 struct amdgpu_device *adev = ring->adev; 821 struct amdgpu_ib ib; 822 struct dma_fence *f = NULL; 823 uint32_t scratch; 824 uint32_t tmp = 0; 825 long r; 826 827 r = amdgpu_gfx_scratch_get(adev, &scratch); 828 if (r) { 829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 830 return r; 831 } 832 WREG32(scratch, 0xCAFEDEAD); 833 memset(&ib, 0, sizeof(ib)); 834 r = amdgpu_ib_get(adev, NULL, 256, &ib); 835 if (r) { 836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 837 goto err1; 838 } 839 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 840 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 841 ib.ptr[2] = 0xDEADBEEF; 842 ib.length_dw = 3; 843 844 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 845 if (r) 846 goto err2; 847 848 r = dma_fence_wait_timeout(f, false, timeout); 849 if (r == 0) { 850 DRM_ERROR("amdgpu: IB test timed out.\n"); 851 r = -ETIMEDOUT; 852 goto err2; 853 } else if (r < 0) { 854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 855 goto err2; 856 } 857 tmp = RREG32(scratch); 858 if (tmp == 0xDEADBEEF) { 859 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 860 r = 0; 861 } else { 862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 863 scratch, tmp); 864 r = -EINVAL; 865 } 866 err2: 867 amdgpu_ib_free(adev, &ib, NULL); 868 dma_fence_put(f); 869 err1: 870 amdgpu_gfx_scratch_free(adev, scratch); 871 return r; 872 } 873 874 875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 876 { 877 release_firmware(adev->gfx.pfp_fw); 878 adev->gfx.pfp_fw = NULL; 879 release_firmware(adev->gfx.me_fw); 880 adev->gfx.me_fw = NULL; 881 release_firmware(adev->gfx.ce_fw); 882 adev->gfx.ce_fw = NULL; 883 release_firmware(adev->gfx.rlc_fw); 884 adev->gfx.rlc_fw = NULL; 885 release_firmware(adev->gfx.mec_fw); 886 adev->gfx.mec_fw = NULL; 887 if ((adev->asic_type != CHIP_STONEY) && 888 (adev->asic_type != CHIP_TOPAZ)) 889 release_firmware(adev->gfx.mec2_fw); 890 adev->gfx.mec2_fw = NULL; 891 892 kfree(adev->gfx.rlc.register_list_format); 893 } 894 895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 896 { 897 const char *chip_name; 898 char fw_name[30]; 899 int err; 900 struct amdgpu_firmware_info *info = NULL; 901 const struct common_firmware_header *header = NULL; 902 const struct gfx_firmware_header_v1_0 *cp_hdr; 903 const struct rlc_firmware_header_v2_0 *rlc_hdr; 904 unsigned int *tmp = NULL, i; 905 906 DRM_DEBUG("\n"); 907 908 switch (adev->asic_type) { 909 case CHIP_TOPAZ: 910 chip_name = "topaz"; 911 break; 912 case CHIP_TONGA: 913 chip_name = "tonga"; 914 break; 915 case CHIP_CARRIZO: 916 chip_name = "carrizo"; 917 break; 918 case CHIP_FIJI: 919 chip_name = "fiji"; 920 break; 921 case CHIP_POLARIS11: 922 chip_name = "polaris11"; 923 break; 924 case CHIP_POLARIS10: 925 chip_name = "polaris10"; 926 break; 927 case CHIP_POLARIS12: 928 chip_name = "polaris12"; 929 break; 930 case CHIP_STONEY: 931 chip_name = "stoney"; 932 break; 933 default: 934 BUG(); 935 } 936 937 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 939 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 940 if (err == -ENOENT) { 941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 942 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 943 } 944 } else { 945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 946 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 947 } 948 if (err) 949 goto out; 950 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 951 if (err) 952 goto out; 953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 954 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 955 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 956 957 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 958 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 959 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 960 if (err == -ENOENT) { 961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 962 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 963 } 964 } else { 965 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 966 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 967 } 968 if (err) 969 goto out; 970 err = amdgpu_ucode_validate(adev->gfx.me_fw); 971 if (err) 972 goto out; 973 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 974 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 975 976 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 977 978 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 979 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 980 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 981 if (err == -ENOENT) { 982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 983 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 984 } 985 } else { 986 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 987 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 988 } 989 if (err) 990 goto out; 991 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 992 if (err) 993 goto out; 994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 995 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 996 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 997 998 /* 999 * Support for MCBP/Virtualization in combination with chained IBs is 1000 * formal released on feature version #46 1001 */ 1002 if (adev->gfx.ce_feature_version >= 46 && 1003 adev->gfx.pfp_feature_version >= 46) { 1004 adev->virt.chained_ib_support = true; 1005 DRM_INFO("Chained IB support enabled!\n"); 1006 } else 1007 adev->virt.chained_ib_support = false; 1008 1009 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1010 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1011 if (err) 1012 goto out; 1013 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1014 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1015 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1016 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1017 1018 adev->gfx.rlc.save_and_restore_offset = 1019 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1020 adev->gfx.rlc.clear_state_descriptor_offset = 1021 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1022 adev->gfx.rlc.avail_scratch_ram_locations = 1023 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1024 adev->gfx.rlc.reg_restore_list_size = 1025 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1026 adev->gfx.rlc.reg_list_format_start = 1027 le32_to_cpu(rlc_hdr->reg_list_format_start); 1028 adev->gfx.rlc.reg_list_format_separate_start = 1029 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1030 adev->gfx.rlc.starting_offsets_start = 1031 le32_to_cpu(rlc_hdr->starting_offsets_start); 1032 adev->gfx.rlc.reg_list_format_size_bytes = 1033 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1034 adev->gfx.rlc.reg_list_size_bytes = 1035 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1036 1037 adev->gfx.rlc.register_list_format = 1038 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1039 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1040 1041 if (!adev->gfx.rlc.register_list_format) { 1042 err = -ENOMEM; 1043 goto out; 1044 } 1045 1046 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1047 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1048 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1049 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1050 1051 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1052 1053 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1054 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1055 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1056 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1057 1058 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1059 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1060 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1061 if (err == -ENOENT) { 1062 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1063 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1064 } 1065 } else { 1066 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1067 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1068 } 1069 if (err) 1070 goto out; 1071 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1072 if (err) 1073 goto out; 1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1075 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1076 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1077 1078 if ((adev->asic_type != CHIP_STONEY) && 1079 (adev->asic_type != CHIP_TOPAZ)) { 1080 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1081 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1082 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1083 if (err == -ENOENT) { 1084 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1085 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1086 } 1087 } else { 1088 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1089 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1090 } 1091 if (!err) { 1092 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1093 if (err) 1094 goto out; 1095 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1096 adev->gfx.mec2_fw->data; 1097 adev->gfx.mec2_fw_version = 1098 le32_to_cpu(cp_hdr->header.ucode_version); 1099 adev->gfx.mec2_feature_version = 1100 le32_to_cpu(cp_hdr->ucode_feature_version); 1101 } else { 1102 err = 0; 1103 adev->gfx.mec2_fw = NULL; 1104 } 1105 } 1106 1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1110 info->fw = adev->gfx.pfp_fw; 1111 header = (const struct common_firmware_header *)info->fw->data; 1112 adev->firmware.fw_size += 1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1114 1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1117 info->fw = adev->gfx.me_fw; 1118 header = (const struct common_firmware_header *)info->fw->data; 1119 adev->firmware.fw_size += 1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1121 1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1124 info->fw = adev->gfx.ce_fw; 1125 header = (const struct common_firmware_header *)info->fw->data; 1126 adev->firmware.fw_size += 1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1128 1129 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1130 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1131 info->fw = adev->gfx.rlc_fw; 1132 header = (const struct common_firmware_header *)info->fw->data; 1133 adev->firmware.fw_size += 1134 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1135 1136 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1137 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1138 info->fw = adev->gfx.mec_fw; 1139 header = (const struct common_firmware_header *)info->fw->data; 1140 adev->firmware.fw_size += 1141 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1142 1143 /* we need account JT in */ 1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1145 adev->firmware.fw_size += 1146 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1147 1148 if (amdgpu_sriov_vf(adev)) { 1149 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1150 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1151 info->fw = adev->gfx.mec_fw; 1152 adev->firmware.fw_size += 1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1154 } 1155 1156 if (adev->gfx.mec2_fw) { 1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1158 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1159 info->fw = adev->gfx.mec2_fw; 1160 header = (const struct common_firmware_header *)info->fw->data; 1161 adev->firmware.fw_size += 1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1163 } 1164 1165 } 1166 1167 out: 1168 if (err) { 1169 dev_err(adev->dev, 1170 "gfx8: Failed to load firmware \"%s\"\n", 1171 fw_name); 1172 release_firmware(adev->gfx.pfp_fw); 1173 adev->gfx.pfp_fw = NULL; 1174 release_firmware(adev->gfx.me_fw); 1175 adev->gfx.me_fw = NULL; 1176 release_firmware(adev->gfx.ce_fw); 1177 adev->gfx.ce_fw = NULL; 1178 release_firmware(adev->gfx.rlc_fw); 1179 adev->gfx.rlc_fw = NULL; 1180 release_firmware(adev->gfx.mec_fw); 1181 adev->gfx.mec_fw = NULL; 1182 release_firmware(adev->gfx.mec2_fw); 1183 adev->gfx.mec2_fw = NULL; 1184 } 1185 return err; 1186 } 1187 1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1189 volatile u32 *buffer) 1190 { 1191 u32 count = 0, i; 1192 const struct cs_section_def *sect = NULL; 1193 const struct cs_extent_def *ext = NULL; 1194 1195 if (adev->gfx.rlc.cs_data == NULL) 1196 return; 1197 if (buffer == NULL) 1198 return; 1199 1200 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1201 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1202 1203 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1204 buffer[count++] = cpu_to_le32(0x80000000); 1205 buffer[count++] = cpu_to_le32(0x80000000); 1206 1207 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1208 for (ext = sect->section; ext->extent != NULL; ++ext) { 1209 if (sect->id == SECT_CONTEXT) { 1210 buffer[count++] = 1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1212 buffer[count++] = cpu_to_le32(ext->reg_index - 1213 PACKET3_SET_CONTEXT_REG_START); 1214 for (i = 0; i < ext->reg_count; i++) 1215 buffer[count++] = cpu_to_le32(ext->extent[i]); 1216 } else { 1217 return; 1218 } 1219 } 1220 } 1221 1222 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1223 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1224 PACKET3_SET_CONTEXT_REG_START); 1225 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1226 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1227 1228 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1229 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1230 1231 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1232 buffer[count++] = cpu_to_le32(0); 1233 } 1234 1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1236 { 1237 const __le32 *fw_data; 1238 volatile u32 *dst_ptr; 1239 int me, i, max_me = 4; 1240 u32 bo_offset = 0; 1241 u32 table_offset, table_size; 1242 1243 if (adev->asic_type == CHIP_CARRIZO) 1244 max_me = 5; 1245 1246 /* write the cp table buffer */ 1247 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1248 for (me = 0; me < max_me; me++) { 1249 if (me == 0) { 1250 const struct gfx_firmware_header_v1_0 *hdr = 1251 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1252 fw_data = (const __le32 *) 1253 (adev->gfx.ce_fw->data + 1254 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1255 table_offset = le32_to_cpu(hdr->jt_offset); 1256 table_size = le32_to_cpu(hdr->jt_size); 1257 } else if (me == 1) { 1258 const struct gfx_firmware_header_v1_0 *hdr = 1259 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1260 fw_data = (const __le32 *) 1261 (adev->gfx.pfp_fw->data + 1262 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1263 table_offset = le32_to_cpu(hdr->jt_offset); 1264 table_size = le32_to_cpu(hdr->jt_size); 1265 } else if (me == 2) { 1266 const struct gfx_firmware_header_v1_0 *hdr = 1267 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1268 fw_data = (const __le32 *) 1269 (adev->gfx.me_fw->data + 1270 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1271 table_offset = le32_to_cpu(hdr->jt_offset); 1272 table_size = le32_to_cpu(hdr->jt_size); 1273 } else if (me == 3) { 1274 const struct gfx_firmware_header_v1_0 *hdr = 1275 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1276 fw_data = (const __le32 *) 1277 (adev->gfx.mec_fw->data + 1278 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1279 table_offset = le32_to_cpu(hdr->jt_offset); 1280 table_size = le32_to_cpu(hdr->jt_size); 1281 } else if (me == 4) { 1282 const struct gfx_firmware_header_v1_0 *hdr = 1283 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1284 fw_data = (const __le32 *) 1285 (adev->gfx.mec2_fw->data + 1286 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1287 table_offset = le32_to_cpu(hdr->jt_offset); 1288 table_size = le32_to_cpu(hdr->jt_size); 1289 } 1290 1291 for (i = 0; i < table_size; i ++) { 1292 dst_ptr[bo_offset + i] = 1293 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1294 } 1295 1296 bo_offset += table_size; 1297 } 1298 } 1299 1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1301 { 1302 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1303 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1304 } 1305 1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1307 { 1308 volatile u32 *dst_ptr; 1309 u32 dws; 1310 const struct cs_section_def *cs_data; 1311 int r; 1312 1313 adev->gfx.rlc.cs_data = vi_cs_data; 1314 1315 cs_data = adev->gfx.rlc.cs_data; 1316 1317 if (cs_data) { 1318 /* clear state block */ 1319 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1320 1321 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1322 AMDGPU_GEM_DOMAIN_VRAM, 1323 &adev->gfx.rlc.clear_state_obj, 1324 &adev->gfx.rlc.clear_state_gpu_addr, 1325 (void **)&adev->gfx.rlc.cs_ptr); 1326 if (r) { 1327 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1328 gfx_v8_0_rlc_fini(adev); 1329 return r; 1330 } 1331 1332 /* set up the cs buffer */ 1333 dst_ptr = adev->gfx.rlc.cs_ptr; 1334 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1335 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1336 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1337 } 1338 1339 if ((adev->asic_type == CHIP_CARRIZO) || 1340 (adev->asic_type == CHIP_STONEY)) { 1341 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1342 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1344 &adev->gfx.rlc.cp_table_obj, 1345 &adev->gfx.rlc.cp_table_gpu_addr, 1346 (void **)&adev->gfx.rlc.cp_table_ptr); 1347 if (r) { 1348 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1349 return r; 1350 } 1351 1352 cz_init_cp_jump_table(adev); 1353 1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1362 { 1363 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1364 } 1365 1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1367 { 1368 int r; 1369 u32 *hpd; 1370 size_t mec_hpd_size; 1371 1372 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1373 1374 /* take ownership of the relevant compute queues */ 1375 amdgpu_gfx_compute_queue_acquire(adev); 1376 1377 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1378 1379 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1380 AMDGPU_GEM_DOMAIN_GTT, 1381 &adev->gfx.mec.hpd_eop_obj, 1382 &adev->gfx.mec.hpd_eop_gpu_addr, 1383 (void **)&hpd); 1384 if (r) { 1385 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1386 return r; 1387 } 1388 1389 memset(hpd, 0, mec_hpd_size); 1390 1391 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1392 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1393 1394 return 0; 1395 } 1396 1397 static const u32 vgpr_init_compute_shader[] = 1398 { 1399 0x7e000209, 0x7e020208, 1400 0x7e040207, 0x7e060206, 1401 0x7e080205, 0x7e0a0204, 1402 0x7e0c0203, 0x7e0e0202, 1403 0x7e100201, 0x7e120200, 1404 0x7e140209, 0x7e160208, 1405 0x7e180207, 0x7e1a0206, 1406 0x7e1c0205, 0x7e1e0204, 1407 0x7e200203, 0x7e220202, 1408 0x7e240201, 0x7e260200, 1409 0x7e280209, 0x7e2a0208, 1410 0x7e2c0207, 0x7e2e0206, 1411 0x7e300205, 0x7e320204, 1412 0x7e340203, 0x7e360202, 1413 0x7e380201, 0x7e3a0200, 1414 0x7e3c0209, 0x7e3e0208, 1415 0x7e400207, 0x7e420206, 1416 0x7e440205, 0x7e460204, 1417 0x7e480203, 0x7e4a0202, 1418 0x7e4c0201, 0x7e4e0200, 1419 0x7e500209, 0x7e520208, 1420 0x7e540207, 0x7e560206, 1421 0x7e580205, 0x7e5a0204, 1422 0x7e5c0203, 0x7e5e0202, 1423 0x7e600201, 0x7e620200, 1424 0x7e640209, 0x7e660208, 1425 0x7e680207, 0x7e6a0206, 1426 0x7e6c0205, 0x7e6e0204, 1427 0x7e700203, 0x7e720202, 1428 0x7e740201, 0x7e760200, 1429 0x7e780209, 0x7e7a0208, 1430 0x7e7c0207, 0x7e7e0206, 1431 0xbf8a0000, 0xbf810000, 1432 }; 1433 1434 static const u32 sgpr_init_compute_shader[] = 1435 { 1436 0xbe8a0100, 0xbe8c0102, 1437 0xbe8e0104, 0xbe900106, 1438 0xbe920108, 0xbe940100, 1439 0xbe960102, 0xbe980104, 1440 0xbe9a0106, 0xbe9c0108, 1441 0xbe9e0100, 0xbea00102, 1442 0xbea20104, 0xbea40106, 1443 0xbea60108, 0xbea80100, 1444 0xbeaa0102, 0xbeac0104, 1445 0xbeae0106, 0xbeb00108, 1446 0xbeb20100, 0xbeb40102, 1447 0xbeb60104, 0xbeb80106, 1448 0xbeba0108, 0xbebc0100, 1449 0xbebe0102, 0xbec00104, 1450 0xbec20106, 0xbec40108, 1451 0xbec60100, 0xbec80102, 1452 0xbee60004, 0xbee70005, 1453 0xbeea0006, 0xbeeb0007, 1454 0xbee80008, 0xbee90009, 1455 0xbefc0000, 0xbf8a0000, 1456 0xbf810000, 0x00000000, 1457 }; 1458 1459 static const u32 vgpr_init_regs[] = 1460 { 1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1462 mmCOMPUTE_RESOURCE_LIMITS, 0, 1463 mmCOMPUTE_NUM_THREAD_X, 256*4, 1464 mmCOMPUTE_NUM_THREAD_Y, 1, 1465 mmCOMPUTE_NUM_THREAD_Z, 1, 1466 mmCOMPUTE_PGM_RSRC2, 20, 1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1477 }; 1478 1479 static const u32 sgpr1_init_regs[] = 1480 { 1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1483 mmCOMPUTE_NUM_THREAD_X, 256*5, 1484 mmCOMPUTE_NUM_THREAD_Y, 1, 1485 mmCOMPUTE_NUM_THREAD_Z, 1, 1486 mmCOMPUTE_PGM_RSRC2, 20, 1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1497 }; 1498 1499 static const u32 sgpr2_init_regs[] = 1500 { 1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1502 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1503 mmCOMPUTE_NUM_THREAD_X, 256*5, 1504 mmCOMPUTE_NUM_THREAD_Y, 1, 1505 mmCOMPUTE_NUM_THREAD_Z, 1, 1506 mmCOMPUTE_PGM_RSRC2, 20, 1507 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1508 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1509 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1510 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1511 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1512 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1513 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1514 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1515 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1516 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1517 }; 1518 1519 static const u32 sec_ded_counter_registers[] = 1520 { 1521 mmCPC_EDC_ATC_CNT, 1522 mmCPC_EDC_SCRATCH_CNT, 1523 mmCPC_EDC_UCODE_CNT, 1524 mmCPF_EDC_ATC_CNT, 1525 mmCPF_EDC_ROQ_CNT, 1526 mmCPF_EDC_TAG_CNT, 1527 mmCPG_EDC_ATC_CNT, 1528 mmCPG_EDC_DMA_CNT, 1529 mmCPG_EDC_TAG_CNT, 1530 mmDC_EDC_CSINVOC_CNT, 1531 mmDC_EDC_RESTORE_CNT, 1532 mmDC_EDC_STATE_CNT, 1533 mmGDS_EDC_CNT, 1534 mmGDS_EDC_GRBM_CNT, 1535 mmGDS_EDC_OA_DED, 1536 mmSPI_EDC_CNT, 1537 mmSQC_ATC_EDC_GATCL1_CNT, 1538 mmSQC_EDC_CNT, 1539 mmSQ_EDC_DED_CNT, 1540 mmSQ_EDC_INFO, 1541 mmSQ_EDC_SEC_CNT, 1542 mmTCC_EDC_CNT, 1543 mmTCP_ATC_EDC_GATCL1_CNT, 1544 mmTCP_EDC_CNT, 1545 mmTD_EDC_CNT 1546 }; 1547 1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1549 { 1550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1551 struct amdgpu_ib ib; 1552 struct dma_fence *f = NULL; 1553 int r, i; 1554 u32 tmp; 1555 unsigned total_size, vgpr_offset, sgpr_offset; 1556 u64 gpu_addr; 1557 1558 /* only supported on CZ */ 1559 if (adev->asic_type != CHIP_CARRIZO) 1560 return 0; 1561 1562 /* bail if the compute ring is not ready */ 1563 if (!ring->ready) 1564 return 0; 1565 1566 tmp = RREG32(mmGB_EDC_MODE); 1567 WREG32(mmGB_EDC_MODE, 0); 1568 1569 total_size = 1570 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1571 total_size += 1572 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1573 total_size += 1574 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1575 total_size = ALIGN(total_size, 256); 1576 vgpr_offset = total_size; 1577 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1578 sgpr_offset = total_size; 1579 total_size += sizeof(sgpr_init_compute_shader); 1580 1581 /* allocate an indirect buffer to put the commands in */ 1582 memset(&ib, 0, sizeof(ib)); 1583 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1584 if (r) { 1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1586 return r; 1587 } 1588 1589 /* load the compute shaders */ 1590 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1591 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1592 1593 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1594 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1595 1596 /* init the ib length to 0 */ 1597 ib.length_dw = 0; 1598 1599 /* VGPR */ 1600 /* write the register state for the compute dispatch */ 1601 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1603 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1604 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1605 } 1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1607 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1612 1613 /* write dispatch packet */ 1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1615 ib.ptr[ib.length_dw++] = 8; /* x */ 1616 ib.ptr[ib.length_dw++] = 1; /* y */ 1617 ib.ptr[ib.length_dw++] = 1; /* z */ 1618 ib.ptr[ib.length_dw++] = 1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1620 1621 /* write CS partial flush packet */ 1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1624 1625 /* SGPR1 */ 1626 /* write the register state for the compute dispatch */ 1627 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1629 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1630 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1631 } 1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1638 1639 /* write dispatch packet */ 1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1641 ib.ptr[ib.length_dw++] = 8; /* x */ 1642 ib.ptr[ib.length_dw++] = 1; /* y */ 1643 ib.ptr[ib.length_dw++] = 1; /* z */ 1644 ib.ptr[ib.length_dw++] = 1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1646 1647 /* write CS partial flush packet */ 1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1650 1651 /* SGPR2 */ 1652 /* write the register state for the compute dispatch */ 1653 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1655 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1656 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1657 } 1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1664 1665 /* write dispatch packet */ 1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1667 ib.ptr[ib.length_dw++] = 8; /* x */ 1668 ib.ptr[ib.length_dw++] = 1; /* y */ 1669 ib.ptr[ib.length_dw++] = 1; /* z */ 1670 ib.ptr[ib.length_dw++] = 1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1672 1673 /* write CS partial flush packet */ 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1676 1677 /* shedule the ib on the ring */ 1678 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1679 if (r) { 1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1681 goto fail; 1682 } 1683 1684 /* wait for the GPU to finish processing the IB */ 1685 r = dma_fence_wait(f, false); 1686 if (r) { 1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1688 goto fail; 1689 } 1690 1691 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1692 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1693 WREG32(mmGB_EDC_MODE, tmp); 1694 1695 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1696 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1697 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1698 1699 1700 /* read back registers to clear the counters */ 1701 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1702 RREG32(sec_ded_counter_registers[i]); 1703 1704 fail: 1705 amdgpu_ib_free(adev, &ib, NULL); 1706 dma_fence_put(f); 1707 1708 return r; 1709 } 1710 1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1712 { 1713 u32 gb_addr_config; 1714 u32 mc_shared_chmap, mc_arb_ramcfg; 1715 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1716 u32 tmp; 1717 int ret; 1718 1719 switch (adev->asic_type) { 1720 case CHIP_TOPAZ: 1721 adev->gfx.config.max_shader_engines = 1; 1722 adev->gfx.config.max_tile_pipes = 2; 1723 adev->gfx.config.max_cu_per_sh = 6; 1724 adev->gfx.config.max_sh_per_se = 1; 1725 adev->gfx.config.max_backends_per_se = 2; 1726 adev->gfx.config.max_texture_channel_caches = 2; 1727 adev->gfx.config.max_gprs = 256; 1728 adev->gfx.config.max_gs_threads = 32; 1729 adev->gfx.config.max_hw_contexts = 8; 1730 1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1735 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1736 break; 1737 case CHIP_FIJI: 1738 adev->gfx.config.max_shader_engines = 4; 1739 adev->gfx.config.max_tile_pipes = 16; 1740 adev->gfx.config.max_cu_per_sh = 16; 1741 adev->gfx.config.max_sh_per_se = 1; 1742 adev->gfx.config.max_backends_per_se = 4; 1743 adev->gfx.config.max_texture_channel_caches = 16; 1744 adev->gfx.config.max_gprs = 256; 1745 adev->gfx.config.max_gs_threads = 32; 1746 adev->gfx.config.max_hw_contexts = 8; 1747 1748 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1749 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1750 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1751 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1752 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1753 break; 1754 case CHIP_POLARIS11: 1755 case CHIP_POLARIS12: 1756 ret = amdgpu_atombios_get_gfx_info(adev); 1757 if (ret) 1758 return ret; 1759 adev->gfx.config.max_gprs = 256; 1760 adev->gfx.config.max_gs_threads = 32; 1761 adev->gfx.config.max_hw_contexts = 8; 1762 1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1767 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1768 break; 1769 case CHIP_POLARIS10: 1770 ret = amdgpu_atombios_get_gfx_info(adev); 1771 if (ret) 1772 return ret; 1773 adev->gfx.config.max_gprs = 256; 1774 adev->gfx.config.max_gs_threads = 32; 1775 adev->gfx.config.max_hw_contexts = 8; 1776 1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1781 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1782 break; 1783 case CHIP_TONGA: 1784 adev->gfx.config.max_shader_engines = 4; 1785 adev->gfx.config.max_tile_pipes = 8; 1786 adev->gfx.config.max_cu_per_sh = 8; 1787 adev->gfx.config.max_sh_per_se = 1; 1788 adev->gfx.config.max_backends_per_se = 2; 1789 adev->gfx.config.max_texture_channel_caches = 8; 1790 adev->gfx.config.max_gprs = 256; 1791 adev->gfx.config.max_gs_threads = 32; 1792 adev->gfx.config.max_hw_contexts = 8; 1793 1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1799 break; 1800 case CHIP_CARRIZO: 1801 adev->gfx.config.max_shader_engines = 1; 1802 adev->gfx.config.max_tile_pipes = 2; 1803 adev->gfx.config.max_sh_per_se = 1; 1804 adev->gfx.config.max_backends_per_se = 2; 1805 adev->gfx.config.max_cu_per_sh = 8; 1806 adev->gfx.config.max_texture_channel_caches = 2; 1807 adev->gfx.config.max_gprs = 256; 1808 adev->gfx.config.max_gs_threads = 32; 1809 adev->gfx.config.max_hw_contexts = 8; 1810 1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1815 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1816 break; 1817 case CHIP_STONEY: 1818 adev->gfx.config.max_shader_engines = 1; 1819 adev->gfx.config.max_tile_pipes = 2; 1820 adev->gfx.config.max_sh_per_se = 1; 1821 adev->gfx.config.max_backends_per_se = 1; 1822 adev->gfx.config.max_cu_per_sh = 3; 1823 adev->gfx.config.max_texture_channel_caches = 2; 1824 adev->gfx.config.max_gprs = 256; 1825 adev->gfx.config.max_gs_threads = 16; 1826 adev->gfx.config.max_hw_contexts = 8; 1827 1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1833 break; 1834 default: 1835 adev->gfx.config.max_shader_engines = 2; 1836 adev->gfx.config.max_tile_pipes = 4; 1837 adev->gfx.config.max_cu_per_sh = 2; 1838 adev->gfx.config.max_sh_per_se = 1; 1839 adev->gfx.config.max_backends_per_se = 2; 1840 adev->gfx.config.max_texture_channel_caches = 4; 1841 adev->gfx.config.max_gprs = 256; 1842 adev->gfx.config.max_gs_threads = 32; 1843 adev->gfx.config.max_hw_contexts = 8; 1844 1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1849 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1850 break; 1851 } 1852 1853 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1854 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1855 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1856 1857 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1858 adev->gfx.config.mem_max_burst_length_bytes = 256; 1859 if (adev->flags & AMD_IS_APU) { 1860 /* Get memory bank mapping mode. */ 1861 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1862 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1863 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1864 1865 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1866 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1867 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1868 1869 /* Validate settings in case only one DIMM installed. */ 1870 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1871 dimm00_addr_map = 0; 1872 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1873 dimm01_addr_map = 0; 1874 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1875 dimm10_addr_map = 0; 1876 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1877 dimm11_addr_map = 0; 1878 1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1881 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1882 adev->gfx.config.mem_row_size_in_kb = 2; 1883 else 1884 adev->gfx.config.mem_row_size_in_kb = 1; 1885 } else { 1886 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1887 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1888 if (adev->gfx.config.mem_row_size_in_kb > 4) 1889 adev->gfx.config.mem_row_size_in_kb = 4; 1890 } 1891 1892 adev->gfx.config.shader_engine_tile_size = 32; 1893 adev->gfx.config.num_gpus = 1; 1894 adev->gfx.config.multi_gpu_tile_size = 64; 1895 1896 /* fix up row size */ 1897 switch (adev->gfx.config.mem_row_size_in_kb) { 1898 case 1: 1899 default: 1900 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1901 break; 1902 case 2: 1903 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1904 break; 1905 case 4: 1906 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1907 break; 1908 } 1909 adev->gfx.config.gb_addr_config = gb_addr_config; 1910 1911 return 0; 1912 } 1913 1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1915 int mec, int pipe, int queue) 1916 { 1917 int r; 1918 unsigned irq_type; 1919 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1920 1921 ring = &adev->gfx.compute_ring[ring_id]; 1922 1923 /* mec0 is me1 */ 1924 ring->me = mec + 1; 1925 ring->pipe = pipe; 1926 ring->queue = queue; 1927 1928 ring->ring_obj = NULL; 1929 ring->use_doorbell = true; 1930 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 1931 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1932 + (ring_id * GFX8_MEC_HPD_SIZE); 1933 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1934 1935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1936 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1937 + ring->pipe; 1938 1939 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1940 r = amdgpu_ring_init(adev, ring, 1024, 1941 &adev->gfx.eop_irq, irq_type); 1942 if (r) 1943 return r; 1944 1945 1946 return 0; 1947 } 1948 1949 static int gfx_v8_0_sw_init(void *handle) 1950 { 1951 int i, j, k, r, ring_id; 1952 struct amdgpu_ring *ring; 1953 struct amdgpu_kiq *kiq; 1954 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1955 1956 switch (adev->asic_type) { 1957 case CHIP_FIJI: 1958 case CHIP_TONGA: 1959 case CHIP_POLARIS11: 1960 case CHIP_POLARIS12: 1961 case CHIP_POLARIS10: 1962 case CHIP_CARRIZO: 1963 adev->gfx.mec.num_mec = 2; 1964 break; 1965 case CHIP_TOPAZ: 1966 case CHIP_STONEY: 1967 default: 1968 adev->gfx.mec.num_mec = 1; 1969 break; 1970 } 1971 1972 adev->gfx.mec.num_pipe_per_mec = 4; 1973 adev->gfx.mec.num_queue_per_pipe = 8; 1974 1975 /* KIQ event */ 1976 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 1977 if (r) 1978 return r; 1979 1980 /* EOP Event */ 1981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 1982 if (r) 1983 return r; 1984 1985 /* Privileged reg */ 1986 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 1987 &adev->gfx.priv_reg_irq); 1988 if (r) 1989 return r; 1990 1991 /* Privileged inst */ 1992 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 1993 &adev->gfx.priv_inst_irq); 1994 if (r) 1995 return r; 1996 1997 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1998 1999 gfx_v8_0_scratch_init(adev); 2000 2001 r = gfx_v8_0_init_microcode(adev); 2002 if (r) { 2003 DRM_ERROR("Failed to load gfx firmware!\n"); 2004 return r; 2005 } 2006 2007 r = gfx_v8_0_rlc_init(adev); 2008 if (r) { 2009 DRM_ERROR("Failed to init rlc BOs!\n"); 2010 return r; 2011 } 2012 2013 r = gfx_v8_0_mec_init(adev); 2014 if (r) { 2015 DRM_ERROR("Failed to init MEC BOs!\n"); 2016 return r; 2017 } 2018 2019 /* set up the gfx ring */ 2020 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2021 ring = &adev->gfx.gfx_ring[i]; 2022 ring->ring_obj = NULL; 2023 sprintf(ring->name, "gfx"); 2024 /* no gfx doorbells on iceland */ 2025 if (adev->asic_type != CHIP_TOPAZ) { 2026 ring->use_doorbell = true; 2027 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2028 } 2029 2030 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2031 AMDGPU_CP_IRQ_GFX_EOP); 2032 if (r) 2033 return r; 2034 } 2035 2036 2037 /* set up the compute queues - allocate horizontally across pipes */ 2038 ring_id = 0; 2039 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2040 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2041 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2043 continue; 2044 2045 r = gfx_v8_0_compute_ring_init(adev, 2046 ring_id, 2047 i, k, j); 2048 if (r) 2049 return r; 2050 2051 ring_id++; 2052 } 2053 } 2054 } 2055 2056 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2057 if (r) { 2058 DRM_ERROR("Failed to init KIQ BOs!\n"); 2059 return r; 2060 } 2061 2062 kiq = &adev->gfx.kiq; 2063 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2064 if (r) 2065 return r; 2066 2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2068 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2069 if (r) 2070 return r; 2071 2072 /* reserve GDS, GWS and OA resource for gfx */ 2073 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2074 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2075 &adev->gds.gds_gfx_bo, NULL, NULL); 2076 if (r) 2077 return r; 2078 2079 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2080 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2081 &adev->gds.gws_gfx_bo, NULL, NULL); 2082 if (r) 2083 return r; 2084 2085 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2086 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2087 &adev->gds.oa_gfx_bo, NULL, NULL); 2088 if (r) 2089 return r; 2090 2091 adev->gfx.ce_ram_size = 0x8000; 2092 2093 r = gfx_v8_0_gpu_early_init(adev); 2094 if (r) 2095 return r; 2096 2097 return 0; 2098 } 2099 2100 static int gfx_v8_0_sw_fini(void *handle) 2101 { 2102 int i; 2103 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2104 2105 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2106 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2107 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2108 2109 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2110 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2111 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2112 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2113 2114 amdgpu_gfx_compute_mqd_sw_fini(adev); 2115 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2116 amdgpu_gfx_kiq_fini(adev); 2117 2118 gfx_v8_0_mec_fini(adev); 2119 gfx_v8_0_rlc_fini(adev); 2120 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2121 &adev->gfx.rlc.clear_state_gpu_addr, 2122 (void **)&adev->gfx.rlc.cs_ptr); 2123 if ((adev->asic_type == CHIP_CARRIZO) || 2124 (adev->asic_type == CHIP_STONEY)) { 2125 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2126 &adev->gfx.rlc.cp_table_gpu_addr, 2127 (void **)&adev->gfx.rlc.cp_table_ptr); 2128 } 2129 gfx_v8_0_free_microcode(adev); 2130 2131 return 0; 2132 } 2133 2134 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2135 { 2136 uint32_t *modearray, *mod2array; 2137 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2138 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2139 u32 reg_offset; 2140 2141 modearray = adev->gfx.config.tile_mode_array; 2142 mod2array = adev->gfx.config.macrotile_mode_array; 2143 2144 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2145 modearray[reg_offset] = 0; 2146 2147 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2148 mod2array[reg_offset] = 0; 2149 2150 switch (adev->asic_type) { 2151 case CHIP_TOPAZ: 2152 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2153 PIPE_CONFIG(ADDR_SURF_P2) | 2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2156 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2157 PIPE_CONFIG(ADDR_SURF_P2) | 2158 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2160 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2161 PIPE_CONFIG(ADDR_SURF_P2) | 2162 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2164 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2165 PIPE_CONFIG(ADDR_SURF_P2) | 2166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2168 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2169 PIPE_CONFIG(ADDR_SURF_P2) | 2170 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2172 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2173 PIPE_CONFIG(ADDR_SURF_P2) | 2174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2176 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2177 PIPE_CONFIG(ADDR_SURF_P2) | 2178 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2180 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2181 PIPE_CONFIG(ADDR_SURF_P2)); 2182 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2183 PIPE_CONFIG(ADDR_SURF_P2) | 2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2186 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2187 PIPE_CONFIG(ADDR_SURF_P2) | 2188 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2190 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2191 PIPE_CONFIG(ADDR_SURF_P2) | 2192 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2194 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2195 PIPE_CONFIG(ADDR_SURF_P2) | 2196 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2198 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2199 PIPE_CONFIG(ADDR_SURF_P2) | 2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2202 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2203 PIPE_CONFIG(ADDR_SURF_P2) | 2204 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2206 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2207 PIPE_CONFIG(ADDR_SURF_P2) | 2208 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2210 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2211 PIPE_CONFIG(ADDR_SURF_P2) | 2212 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2214 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2215 PIPE_CONFIG(ADDR_SURF_P2) | 2216 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2218 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2219 PIPE_CONFIG(ADDR_SURF_P2) | 2220 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2222 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2223 PIPE_CONFIG(ADDR_SURF_P2) | 2224 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2226 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2227 PIPE_CONFIG(ADDR_SURF_P2) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2230 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2231 PIPE_CONFIG(ADDR_SURF_P2) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2234 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2235 PIPE_CONFIG(ADDR_SURF_P2) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2238 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2239 PIPE_CONFIG(ADDR_SURF_P2) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2242 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2243 PIPE_CONFIG(ADDR_SURF_P2) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2246 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2247 PIPE_CONFIG(ADDR_SURF_P2) | 2248 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2250 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2251 PIPE_CONFIG(ADDR_SURF_P2) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2254 2255 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2258 NUM_BANKS(ADDR_SURF_8_BANK)); 2259 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2262 NUM_BANKS(ADDR_SURF_8_BANK)); 2263 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2264 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2265 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2266 NUM_BANKS(ADDR_SURF_8_BANK)); 2267 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2270 NUM_BANKS(ADDR_SURF_8_BANK)); 2271 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2274 NUM_BANKS(ADDR_SURF_8_BANK)); 2275 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2278 NUM_BANKS(ADDR_SURF_8_BANK)); 2279 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2282 NUM_BANKS(ADDR_SURF_8_BANK)); 2283 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2286 NUM_BANKS(ADDR_SURF_16_BANK)); 2287 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2290 NUM_BANKS(ADDR_SURF_16_BANK)); 2291 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2294 NUM_BANKS(ADDR_SURF_16_BANK)); 2295 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2298 NUM_BANKS(ADDR_SURF_16_BANK)); 2299 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2302 NUM_BANKS(ADDR_SURF_16_BANK)); 2303 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2306 NUM_BANKS(ADDR_SURF_16_BANK)); 2307 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2310 NUM_BANKS(ADDR_SURF_8_BANK)); 2311 2312 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2313 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2314 reg_offset != 23) 2315 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2316 2317 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2318 if (reg_offset != 7) 2319 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2320 2321 break; 2322 case CHIP_FIJI: 2323 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2327 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2331 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2335 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2339 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2341 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2343 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2347 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2349 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2350 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2351 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2352 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2355 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2357 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2358 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2361 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2362 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2365 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2366 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2369 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2370 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2373 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2374 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2377 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2381 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2383 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2385 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2389 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2390 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2391 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2393 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2395 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2397 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2401 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2405 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2409 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2413 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2414 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2415 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2417 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2421 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2423 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2425 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2426 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2427 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2429 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2433 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2434 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2437 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2439 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2441 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2442 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2443 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2445 2446 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2449 NUM_BANKS(ADDR_SURF_8_BANK)); 2450 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2453 NUM_BANKS(ADDR_SURF_8_BANK)); 2454 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2457 NUM_BANKS(ADDR_SURF_8_BANK)); 2458 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2461 NUM_BANKS(ADDR_SURF_8_BANK)); 2462 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2465 NUM_BANKS(ADDR_SURF_8_BANK)); 2466 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2469 NUM_BANKS(ADDR_SURF_8_BANK)); 2470 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2473 NUM_BANKS(ADDR_SURF_8_BANK)); 2474 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2477 NUM_BANKS(ADDR_SURF_8_BANK)); 2478 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2481 NUM_BANKS(ADDR_SURF_8_BANK)); 2482 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2485 NUM_BANKS(ADDR_SURF_8_BANK)); 2486 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2489 NUM_BANKS(ADDR_SURF_8_BANK)); 2490 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2493 NUM_BANKS(ADDR_SURF_8_BANK)); 2494 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2497 NUM_BANKS(ADDR_SURF_8_BANK)); 2498 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2501 NUM_BANKS(ADDR_SURF_4_BANK)); 2502 2503 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2504 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2505 2506 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2507 if (reg_offset != 7) 2508 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2509 2510 break; 2511 case CHIP_TONGA: 2512 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2516 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2518 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2520 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2524 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2528 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2530 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2532 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2534 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2536 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2538 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2539 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2540 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2541 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2542 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2544 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2546 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2550 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2554 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2558 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2559 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2562 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2563 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2564 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2566 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2567 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2570 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2571 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2574 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2576 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2578 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2579 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2580 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2582 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2586 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2588 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2590 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2592 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2594 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2596 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2598 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2600 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2602 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2603 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2604 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2606 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2608 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2609 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2610 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2612 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2614 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2615 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2616 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2618 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2620 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2621 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2622 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2623 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2624 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2625 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2626 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2627 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2628 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2630 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2631 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2632 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2634 2635 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2638 NUM_BANKS(ADDR_SURF_16_BANK)); 2639 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2642 NUM_BANKS(ADDR_SURF_16_BANK)); 2643 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2646 NUM_BANKS(ADDR_SURF_16_BANK)); 2647 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2650 NUM_BANKS(ADDR_SURF_16_BANK)); 2651 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2654 NUM_BANKS(ADDR_SURF_16_BANK)); 2655 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2658 NUM_BANKS(ADDR_SURF_16_BANK)); 2659 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2662 NUM_BANKS(ADDR_SURF_16_BANK)); 2663 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2666 NUM_BANKS(ADDR_SURF_16_BANK)); 2667 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2670 NUM_BANKS(ADDR_SURF_16_BANK)); 2671 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2674 NUM_BANKS(ADDR_SURF_16_BANK)); 2675 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2678 NUM_BANKS(ADDR_SURF_16_BANK)); 2679 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2682 NUM_BANKS(ADDR_SURF_8_BANK)); 2683 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2686 NUM_BANKS(ADDR_SURF_4_BANK)); 2687 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2690 NUM_BANKS(ADDR_SURF_4_BANK)); 2691 2692 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2693 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2694 2695 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2696 if (reg_offset != 7) 2697 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2698 2699 break; 2700 case CHIP_POLARIS11: 2701 case CHIP_POLARIS12: 2702 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2704 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2705 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2706 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2708 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2710 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2712 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2713 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2714 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2716 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2717 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2718 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2720 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2722 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2724 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2726 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2728 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2730 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2732 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2733 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2734 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2735 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2736 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2737 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2740 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2741 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2744 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2745 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2748 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2749 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2750 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2752 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2753 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2754 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2756 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2757 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2758 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2760 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2761 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2762 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2763 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2764 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2765 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2766 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2767 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2768 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2769 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2770 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2772 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2774 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2776 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2778 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2779 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2780 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2782 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2783 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2784 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2786 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2788 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2792 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2794 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2796 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2799 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2800 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2802 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2804 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2806 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2808 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2809 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2810 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2811 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2812 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2814 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2816 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2818 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2820 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2821 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2822 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2824 2825 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2828 NUM_BANKS(ADDR_SURF_16_BANK)); 2829 2830 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2831 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2832 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2833 NUM_BANKS(ADDR_SURF_16_BANK)); 2834 2835 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2836 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2837 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2838 NUM_BANKS(ADDR_SURF_16_BANK)); 2839 2840 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2841 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2842 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2843 NUM_BANKS(ADDR_SURF_16_BANK)); 2844 2845 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2848 NUM_BANKS(ADDR_SURF_16_BANK)); 2849 2850 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2851 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2852 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2853 NUM_BANKS(ADDR_SURF_16_BANK)); 2854 2855 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2858 NUM_BANKS(ADDR_SURF_16_BANK)); 2859 2860 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2861 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2862 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2863 NUM_BANKS(ADDR_SURF_16_BANK)); 2864 2865 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2868 NUM_BANKS(ADDR_SURF_16_BANK)); 2869 2870 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2873 NUM_BANKS(ADDR_SURF_16_BANK)); 2874 2875 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2878 NUM_BANKS(ADDR_SURF_16_BANK)); 2879 2880 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2883 NUM_BANKS(ADDR_SURF_16_BANK)); 2884 2885 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2888 NUM_BANKS(ADDR_SURF_8_BANK)); 2889 2890 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2893 NUM_BANKS(ADDR_SURF_4_BANK)); 2894 2895 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2896 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2897 2898 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2899 if (reg_offset != 7) 2900 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2901 2902 break; 2903 case CHIP_POLARIS10: 2904 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2906 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2907 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2908 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2911 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2912 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2914 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2915 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2916 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2919 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2920 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2922 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2923 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2924 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2926 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2928 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2931 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2932 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2933 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2934 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2935 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2936 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2938 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2939 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2942 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2943 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2946 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2947 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2948 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2949 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2950 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2951 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2952 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2954 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2955 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2956 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2957 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2958 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2959 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2960 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2961 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2962 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2964 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2966 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2967 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2968 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2969 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2970 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2971 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2972 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2973 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2974 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2976 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2978 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2980 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2981 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2982 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2984 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2985 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2986 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2988 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2990 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2992 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2993 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2994 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2995 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2996 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2998 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3000 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3002 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3004 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3006 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3007 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3008 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3010 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3012 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3013 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3014 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3015 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3016 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3018 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3020 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3022 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3023 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3024 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3025 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3026 3027 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3028 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3029 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3030 NUM_BANKS(ADDR_SURF_16_BANK)); 3031 3032 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3035 NUM_BANKS(ADDR_SURF_16_BANK)); 3036 3037 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3038 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3039 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3040 NUM_BANKS(ADDR_SURF_16_BANK)); 3041 3042 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3045 NUM_BANKS(ADDR_SURF_16_BANK)); 3046 3047 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3048 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3049 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3050 NUM_BANKS(ADDR_SURF_16_BANK)); 3051 3052 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3055 NUM_BANKS(ADDR_SURF_16_BANK)); 3056 3057 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3060 NUM_BANKS(ADDR_SURF_16_BANK)); 3061 3062 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3065 NUM_BANKS(ADDR_SURF_16_BANK)); 3066 3067 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3070 NUM_BANKS(ADDR_SURF_16_BANK)); 3071 3072 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3075 NUM_BANKS(ADDR_SURF_16_BANK)); 3076 3077 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3080 NUM_BANKS(ADDR_SURF_16_BANK)); 3081 3082 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3085 NUM_BANKS(ADDR_SURF_8_BANK)); 3086 3087 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3090 NUM_BANKS(ADDR_SURF_4_BANK)); 3091 3092 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3095 NUM_BANKS(ADDR_SURF_4_BANK)); 3096 3097 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3098 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3099 3100 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3101 if (reg_offset != 7) 3102 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3103 3104 break; 3105 case CHIP_STONEY: 3106 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3107 PIPE_CONFIG(ADDR_SURF_P2) | 3108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3110 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3111 PIPE_CONFIG(ADDR_SURF_P2) | 3112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3114 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3115 PIPE_CONFIG(ADDR_SURF_P2) | 3116 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3118 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3119 PIPE_CONFIG(ADDR_SURF_P2) | 3120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3122 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3123 PIPE_CONFIG(ADDR_SURF_P2) | 3124 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3125 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3126 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3127 PIPE_CONFIG(ADDR_SURF_P2) | 3128 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3130 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3131 PIPE_CONFIG(ADDR_SURF_P2) | 3132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3134 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3135 PIPE_CONFIG(ADDR_SURF_P2)); 3136 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3137 PIPE_CONFIG(ADDR_SURF_P2) | 3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3140 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3141 PIPE_CONFIG(ADDR_SURF_P2) | 3142 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3144 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3145 PIPE_CONFIG(ADDR_SURF_P2) | 3146 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3148 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3149 PIPE_CONFIG(ADDR_SURF_P2) | 3150 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3152 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3153 PIPE_CONFIG(ADDR_SURF_P2) | 3154 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3156 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3157 PIPE_CONFIG(ADDR_SURF_P2) | 3158 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3160 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3161 PIPE_CONFIG(ADDR_SURF_P2) | 3162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3164 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3165 PIPE_CONFIG(ADDR_SURF_P2) | 3166 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3168 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3169 PIPE_CONFIG(ADDR_SURF_P2) | 3170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3172 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3173 PIPE_CONFIG(ADDR_SURF_P2) | 3174 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3176 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3177 PIPE_CONFIG(ADDR_SURF_P2) | 3178 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3180 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3184 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3188 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3192 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3196 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3200 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3204 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3205 PIPE_CONFIG(ADDR_SURF_P2) | 3206 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3208 3209 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3212 NUM_BANKS(ADDR_SURF_8_BANK)); 3213 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3216 NUM_BANKS(ADDR_SURF_8_BANK)); 3217 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3220 NUM_BANKS(ADDR_SURF_8_BANK)); 3221 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3224 NUM_BANKS(ADDR_SURF_8_BANK)); 3225 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3228 NUM_BANKS(ADDR_SURF_8_BANK)); 3229 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3232 NUM_BANKS(ADDR_SURF_8_BANK)); 3233 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3236 NUM_BANKS(ADDR_SURF_8_BANK)); 3237 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3240 NUM_BANKS(ADDR_SURF_16_BANK)); 3241 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3244 NUM_BANKS(ADDR_SURF_16_BANK)); 3245 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3248 NUM_BANKS(ADDR_SURF_16_BANK)); 3249 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3252 NUM_BANKS(ADDR_SURF_16_BANK)); 3253 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3256 NUM_BANKS(ADDR_SURF_16_BANK)); 3257 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3260 NUM_BANKS(ADDR_SURF_16_BANK)); 3261 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3264 NUM_BANKS(ADDR_SURF_8_BANK)); 3265 3266 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3267 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3268 reg_offset != 23) 3269 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3270 3271 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3272 if (reg_offset != 7) 3273 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3274 3275 break; 3276 default: 3277 dev_warn(adev->dev, 3278 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3279 adev->asic_type); 3280 3281 case CHIP_CARRIZO: 3282 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3283 PIPE_CONFIG(ADDR_SURF_P2) | 3284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3286 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3287 PIPE_CONFIG(ADDR_SURF_P2) | 3288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3290 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3291 PIPE_CONFIG(ADDR_SURF_P2) | 3292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3294 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3295 PIPE_CONFIG(ADDR_SURF_P2) | 3296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3298 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3299 PIPE_CONFIG(ADDR_SURF_P2) | 3300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3302 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3303 PIPE_CONFIG(ADDR_SURF_P2) | 3304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3306 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3307 PIPE_CONFIG(ADDR_SURF_P2) | 3308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3310 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3311 PIPE_CONFIG(ADDR_SURF_P2)); 3312 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3313 PIPE_CONFIG(ADDR_SURF_P2) | 3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3316 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3317 PIPE_CONFIG(ADDR_SURF_P2) | 3318 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3320 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3321 PIPE_CONFIG(ADDR_SURF_P2) | 3322 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3324 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3325 PIPE_CONFIG(ADDR_SURF_P2) | 3326 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3328 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3329 PIPE_CONFIG(ADDR_SURF_P2) | 3330 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3332 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3333 PIPE_CONFIG(ADDR_SURF_P2) | 3334 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3336 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3337 PIPE_CONFIG(ADDR_SURF_P2) | 3338 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3340 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3341 PIPE_CONFIG(ADDR_SURF_P2) | 3342 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3344 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3345 PIPE_CONFIG(ADDR_SURF_P2) | 3346 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3348 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3349 PIPE_CONFIG(ADDR_SURF_P2) | 3350 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3352 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3353 PIPE_CONFIG(ADDR_SURF_P2) | 3354 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3356 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3357 PIPE_CONFIG(ADDR_SURF_P2) | 3358 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3360 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3361 PIPE_CONFIG(ADDR_SURF_P2) | 3362 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3364 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3365 PIPE_CONFIG(ADDR_SURF_P2) | 3366 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3368 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3369 PIPE_CONFIG(ADDR_SURF_P2) | 3370 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3372 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3373 PIPE_CONFIG(ADDR_SURF_P2) | 3374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3376 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3377 PIPE_CONFIG(ADDR_SURF_P2) | 3378 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3380 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3381 PIPE_CONFIG(ADDR_SURF_P2) | 3382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3384 3385 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3388 NUM_BANKS(ADDR_SURF_8_BANK)); 3389 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3392 NUM_BANKS(ADDR_SURF_8_BANK)); 3393 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3396 NUM_BANKS(ADDR_SURF_8_BANK)); 3397 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3400 NUM_BANKS(ADDR_SURF_8_BANK)); 3401 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3404 NUM_BANKS(ADDR_SURF_8_BANK)); 3405 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3408 NUM_BANKS(ADDR_SURF_8_BANK)); 3409 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3412 NUM_BANKS(ADDR_SURF_8_BANK)); 3413 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3416 NUM_BANKS(ADDR_SURF_16_BANK)); 3417 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3420 NUM_BANKS(ADDR_SURF_16_BANK)); 3421 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3424 NUM_BANKS(ADDR_SURF_16_BANK)); 3425 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3428 NUM_BANKS(ADDR_SURF_16_BANK)); 3429 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3432 NUM_BANKS(ADDR_SURF_16_BANK)); 3433 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3436 NUM_BANKS(ADDR_SURF_16_BANK)); 3437 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3440 NUM_BANKS(ADDR_SURF_8_BANK)); 3441 3442 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3443 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3444 reg_offset != 23) 3445 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3446 3447 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3448 if (reg_offset != 7) 3449 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3450 3451 break; 3452 } 3453 } 3454 3455 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3456 u32 se_num, u32 sh_num, u32 instance) 3457 { 3458 u32 data; 3459 3460 if (instance == 0xffffffff) 3461 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3462 else 3463 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3464 3465 if (se_num == 0xffffffff) 3466 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3467 else 3468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3469 3470 if (sh_num == 0xffffffff) 3471 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3472 else 3473 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3474 3475 WREG32(mmGRBM_GFX_INDEX, data); 3476 } 3477 3478 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3479 { 3480 u32 data, mask; 3481 3482 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3483 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3484 3485 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3486 3487 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3488 adev->gfx.config.max_sh_per_se); 3489 3490 return (~data) & mask; 3491 } 3492 3493 static void 3494 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3495 { 3496 switch (adev->asic_type) { 3497 case CHIP_FIJI: 3498 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3499 RB_XSEL2(1) | PKR_MAP(2) | 3500 PKR_XSEL(1) | PKR_YSEL(1) | 3501 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3502 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3503 SE_PAIR_YSEL(2); 3504 break; 3505 case CHIP_TONGA: 3506 case CHIP_POLARIS10: 3507 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3508 SE_XSEL(1) | SE_YSEL(1); 3509 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3510 SE_PAIR_YSEL(2); 3511 break; 3512 case CHIP_TOPAZ: 3513 case CHIP_CARRIZO: 3514 *rconf |= RB_MAP_PKR0(2); 3515 *rconf1 |= 0x0; 3516 break; 3517 case CHIP_POLARIS11: 3518 case CHIP_POLARIS12: 3519 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3520 SE_XSEL(1) | SE_YSEL(1); 3521 *rconf1 |= 0x0; 3522 break; 3523 case CHIP_STONEY: 3524 *rconf |= 0x0; 3525 *rconf1 |= 0x0; 3526 break; 3527 default: 3528 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3529 break; 3530 } 3531 } 3532 3533 static void 3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3535 u32 raster_config, u32 raster_config_1, 3536 unsigned rb_mask, unsigned num_rb) 3537 { 3538 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3539 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3540 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3541 unsigned rb_per_se = num_rb / num_se; 3542 unsigned se_mask[4]; 3543 unsigned se; 3544 3545 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3546 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3547 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3548 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3549 3550 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3551 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3552 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3553 3554 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3555 (!se_mask[2] && !se_mask[3]))) { 3556 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3557 3558 if (!se_mask[0] && !se_mask[1]) { 3559 raster_config_1 |= 3560 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3561 } else { 3562 raster_config_1 |= 3563 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3564 } 3565 } 3566 3567 for (se = 0; se < num_se; se++) { 3568 unsigned raster_config_se = raster_config; 3569 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3570 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3571 int idx = (se / 2) * 2; 3572 3573 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3574 raster_config_se &= ~SE_MAP_MASK; 3575 3576 if (!se_mask[idx]) { 3577 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3578 } else { 3579 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3580 } 3581 } 3582 3583 pkr0_mask &= rb_mask; 3584 pkr1_mask &= rb_mask; 3585 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3586 raster_config_se &= ~PKR_MAP_MASK; 3587 3588 if (!pkr0_mask) { 3589 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3590 } else { 3591 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3592 } 3593 } 3594 3595 if (rb_per_se >= 2) { 3596 unsigned rb0_mask = 1 << (se * rb_per_se); 3597 unsigned rb1_mask = rb0_mask << 1; 3598 3599 rb0_mask &= rb_mask; 3600 rb1_mask &= rb_mask; 3601 if (!rb0_mask || !rb1_mask) { 3602 raster_config_se &= ~RB_MAP_PKR0_MASK; 3603 3604 if (!rb0_mask) { 3605 raster_config_se |= 3606 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3607 } else { 3608 raster_config_se |= 3609 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3610 } 3611 } 3612 3613 if (rb_per_se > 2) { 3614 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3615 rb1_mask = rb0_mask << 1; 3616 rb0_mask &= rb_mask; 3617 rb1_mask &= rb_mask; 3618 if (!rb0_mask || !rb1_mask) { 3619 raster_config_se &= ~RB_MAP_PKR1_MASK; 3620 3621 if (!rb0_mask) { 3622 raster_config_se |= 3623 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3624 } else { 3625 raster_config_se |= 3626 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3627 } 3628 } 3629 } 3630 } 3631 3632 /* GRBM_GFX_INDEX has a different offset on VI */ 3633 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3634 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3635 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3636 } 3637 3638 /* GRBM_GFX_INDEX has a different offset on VI */ 3639 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3640 } 3641 3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3643 { 3644 int i, j; 3645 u32 data; 3646 u32 raster_config = 0, raster_config_1 = 0; 3647 u32 active_rbs = 0; 3648 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3649 adev->gfx.config.max_sh_per_se; 3650 unsigned num_rb_pipes; 3651 3652 mutex_lock(&adev->grbm_idx_mutex); 3653 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3654 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3655 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3656 data = gfx_v8_0_get_rb_active_bitmap(adev); 3657 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3658 rb_bitmap_width_per_sh); 3659 } 3660 } 3661 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3662 3663 adev->gfx.config.backend_enable_mask = active_rbs; 3664 adev->gfx.config.num_rbs = hweight32(active_rbs); 3665 3666 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3667 adev->gfx.config.max_shader_engines, 16); 3668 3669 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3670 3671 if (!adev->gfx.config.backend_enable_mask || 3672 adev->gfx.config.num_rbs >= num_rb_pipes) { 3673 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3674 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3675 } else { 3676 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3677 adev->gfx.config.backend_enable_mask, 3678 num_rb_pipes); 3679 } 3680 3681 /* cache the values for userspace */ 3682 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3683 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3684 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3685 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3686 RREG32(mmCC_RB_BACKEND_DISABLE); 3687 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3688 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3689 adev->gfx.config.rb_config[i][j].raster_config = 3690 RREG32(mmPA_SC_RASTER_CONFIG); 3691 adev->gfx.config.rb_config[i][j].raster_config_1 = 3692 RREG32(mmPA_SC_RASTER_CONFIG_1); 3693 } 3694 } 3695 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3696 mutex_unlock(&adev->grbm_idx_mutex); 3697 } 3698 3699 /** 3700 * gfx_v8_0_init_compute_vmid - gart enable 3701 * 3702 * @adev: amdgpu_device pointer 3703 * 3704 * Initialize compute vmid sh_mem registers 3705 * 3706 */ 3707 #define DEFAULT_SH_MEM_BASES (0x6000) 3708 #define FIRST_COMPUTE_VMID (8) 3709 #define LAST_COMPUTE_VMID (16) 3710 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3711 { 3712 int i; 3713 uint32_t sh_mem_config; 3714 uint32_t sh_mem_bases; 3715 3716 /* 3717 * Configure apertures: 3718 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3719 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3720 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3721 */ 3722 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3723 3724 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3725 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3726 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3727 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3728 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3729 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3730 3731 mutex_lock(&adev->srbm_mutex); 3732 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3733 vi_srbm_select(adev, 0, 0, 0, i); 3734 /* CP and shaders */ 3735 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3736 WREG32(mmSH_MEM_APE1_BASE, 1); 3737 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3738 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3739 } 3740 vi_srbm_select(adev, 0, 0, 0, 0); 3741 mutex_unlock(&adev->srbm_mutex); 3742 } 3743 3744 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3745 { 3746 switch (adev->asic_type) { 3747 default: 3748 adev->gfx.config.double_offchip_lds_buf = 1; 3749 break; 3750 case CHIP_CARRIZO: 3751 case CHIP_STONEY: 3752 adev->gfx.config.double_offchip_lds_buf = 0; 3753 break; 3754 } 3755 } 3756 3757 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3758 { 3759 u32 tmp, sh_static_mem_cfg; 3760 int i; 3761 3762 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3763 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3764 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3765 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3766 3767 gfx_v8_0_tiling_mode_table_init(adev); 3768 gfx_v8_0_setup_rb(adev); 3769 gfx_v8_0_get_cu_info(adev); 3770 gfx_v8_0_config_init(adev); 3771 3772 /* XXX SH_MEM regs */ 3773 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3774 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3775 SWIZZLE_ENABLE, 1); 3776 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3777 ELEMENT_SIZE, 1); 3778 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3779 INDEX_STRIDE, 3); 3780 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3781 3782 mutex_lock(&adev->srbm_mutex); 3783 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3784 vi_srbm_select(adev, 0, 0, 0, i); 3785 /* CP and shaders */ 3786 if (i == 0) { 3787 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3788 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3789 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3790 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3791 WREG32(mmSH_MEM_CONFIG, tmp); 3792 WREG32(mmSH_MEM_BASES, 0); 3793 } else { 3794 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3795 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3796 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3797 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3798 WREG32(mmSH_MEM_CONFIG, tmp); 3799 tmp = adev->mc.shared_aperture_start >> 48; 3800 WREG32(mmSH_MEM_BASES, tmp); 3801 } 3802 3803 WREG32(mmSH_MEM_APE1_BASE, 1); 3804 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3805 } 3806 vi_srbm_select(adev, 0, 0, 0, 0); 3807 mutex_unlock(&adev->srbm_mutex); 3808 3809 gfx_v8_0_init_compute_vmid(adev); 3810 3811 mutex_lock(&adev->grbm_idx_mutex); 3812 /* 3813 * making sure that the following register writes will be broadcasted 3814 * to all the shaders 3815 */ 3816 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3817 3818 WREG32(mmPA_SC_FIFO_SIZE, 3819 (adev->gfx.config.sc_prim_fifo_size_frontend << 3820 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3821 (adev->gfx.config.sc_prim_fifo_size_backend << 3822 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3823 (adev->gfx.config.sc_hiz_tile_fifo_size << 3824 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3825 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3826 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3827 3828 tmp = RREG32(mmSPI_ARB_PRIORITY); 3829 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3830 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3831 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3832 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3833 WREG32(mmSPI_ARB_PRIORITY, tmp); 3834 3835 mutex_unlock(&adev->grbm_idx_mutex); 3836 3837 } 3838 3839 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3840 { 3841 u32 i, j, k; 3842 u32 mask; 3843 3844 mutex_lock(&adev->grbm_idx_mutex); 3845 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3846 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3847 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3848 for (k = 0; k < adev->usec_timeout; k++) { 3849 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3850 break; 3851 udelay(1); 3852 } 3853 if (k == adev->usec_timeout) { 3854 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3855 0xffffffff, 0xffffffff); 3856 mutex_unlock(&adev->grbm_idx_mutex); 3857 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3858 i, j); 3859 return; 3860 } 3861 } 3862 } 3863 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3864 mutex_unlock(&adev->grbm_idx_mutex); 3865 3866 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3867 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3868 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3869 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3870 for (k = 0; k < adev->usec_timeout; k++) { 3871 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3872 break; 3873 udelay(1); 3874 } 3875 } 3876 3877 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3878 bool enable) 3879 { 3880 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3881 3882 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3883 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3884 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3885 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3886 3887 WREG32(mmCP_INT_CNTL_RING0, tmp); 3888 } 3889 3890 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3891 { 3892 /* csib */ 3893 WREG32(mmRLC_CSIB_ADDR_HI, 3894 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3895 WREG32(mmRLC_CSIB_ADDR_LO, 3896 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3897 WREG32(mmRLC_CSIB_LENGTH, 3898 adev->gfx.rlc.clear_state_size); 3899 } 3900 3901 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3902 int ind_offset, 3903 int list_size, 3904 int *unique_indices, 3905 int *indices_count, 3906 int max_indices, 3907 int *ind_start_offsets, 3908 int *offset_count, 3909 int max_offset) 3910 { 3911 int indices; 3912 bool new_entry = true; 3913 3914 for (; ind_offset < list_size; ind_offset++) { 3915 3916 if (new_entry) { 3917 new_entry = false; 3918 ind_start_offsets[*offset_count] = ind_offset; 3919 *offset_count = *offset_count + 1; 3920 BUG_ON(*offset_count >= max_offset); 3921 } 3922 3923 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3924 new_entry = true; 3925 continue; 3926 } 3927 3928 ind_offset += 2; 3929 3930 /* look for the matching indice */ 3931 for (indices = 0; 3932 indices < *indices_count; 3933 indices++) { 3934 if (unique_indices[indices] == 3935 register_list_format[ind_offset]) 3936 break; 3937 } 3938 3939 if (indices >= *indices_count) { 3940 unique_indices[*indices_count] = 3941 register_list_format[ind_offset]; 3942 indices = *indices_count; 3943 *indices_count = *indices_count + 1; 3944 BUG_ON(*indices_count >= max_indices); 3945 } 3946 3947 register_list_format[ind_offset] = indices; 3948 } 3949 } 3950 3951 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3952 { 3953 int i, temp, data; 3954 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3955 int indices_count = 0; 3956 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3957 int offset_count = 0; 3958 3959 int list_size; 3960 unsigned int *register_list_format = 3961 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3962 if (!register_list_format) 3963 return -ENOMEM; 3964 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3965 adev->gfx.rlc.reg_list_format_size_bytes); 3966 3967 gfx_v8_0_parse_ind_reg_list(register_list_format, 3968 RLC_FormatDirectRegListLength, 3969 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3970 unique_indices, 3971 &indices_count, 3972 ARRAY_SIZE(unique_indices), 3973 indirect_start_offsets, 3974 &offset_count, 3975 ARRAY_SIZE(indirect_start_offsets)); 3976 3977 /* save and restore list */ 3978 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3979 3980 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3981 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3982 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3983 3984 /* indirect list */ 3985 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3986 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3987 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3988 3989 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3990 list_size = list_size >> 1; 3991 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3992 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3993 3994 /* starting offsets starts */ 3995 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3996 adev->gfx.rlc.starting_offsets_start); 3997 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 3998 WREG32(mmRLC_GPM_SCRATCH_DATA, 3999 indirect_start_offsets[i]); 4000 4001 /* unique indices */ 4002 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4003 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4004 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4005 if (unique_indices[i] != 0) { 4006 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4007 WREG32(data + i, unique_indices[i] >> 20); 4008 } 4009 } 4010 kfree(register_list_format); 4011 4012 return 0; 4013 } 4014 4015 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4016 { 4017 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4018 } 4019 4020 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4021 { 4022 uint32_t data; 4023 4024 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4025 4026 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4027 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4028 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4029 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4030 WREG32(mmRLC_PG_DELAY, data); 4031 4032 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4033 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4034 4035 } 4036 4037 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4038 bool enable) 4039 { 4040 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4041 } 4042 4043 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4044 bool enable) 4045 { 4046 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4047 } 4048 4049 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4050 { 4051 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4052 } 4053 4054 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4055 { 4056 if ((adev->asic_type == CHIP_CARRIZO) || 4057 (adev->asic_type == CHIP_STONEY)) { 4058 gfx_v8_0_init_csb(adev); 4059 gfx_v8_0_init_save_restore_list(adev); 4060 gfx_v8_0_enable_save_restore_machine(adev); 4061 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4062 gfx_v8_0_init_power_gating(adev); 4063 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4064 } else if ((adev->asic_type == CHIP_POLARIS11) || 4065 (adev->asic_type == CHIP_POLARIS12)) { 4066 gfx_v8_0_init_csb(adev); 4067 gfx_v8_0_init_save_restore_list(adev); 4068 gfx_v8_0_enable_save_restore_machine(adev); 4069 gfx_v8_0_init_power_gating(adev); 4070 } 4071 4072 } 4073 4074 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4075 { 4076 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4077 4078 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4079 gfx_v8_0_wait_for_rlc_serdes(adev); 4080 } 4081 4082 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4083 { 4084 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4085 udelay(50); 4086 4087 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4088 udelay(50); 4089 } 4090 4091 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4092 { 4093 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4094 4095 /* carrizo do enable cp interrupt after cp inited */ 4096 if (!(adev->flags & AMD_IS_APU)) 4097 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4098 4099 udelay(50); 4100 } 4101 4102 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4103 { 4104 const struct rlc_firmware_header_v2_0 *hdr; 4105 const __le32 *fw_data; 4106 unsigned i, fw_size; 4107 4108 if (!adev->gfx.rlc_fw) 4109 return -EINVAL; 4110 4111 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4112 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4113 4114 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4115 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4116 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4117 4118 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4119 for (i = 0; i < fw_size; i++) 4120 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4121 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4122 4123 return 0; 4124 } 4125 4126 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4127 { 4128 int r; 4129 u32 tmp; 4130 4131 gfx_v8_0_rlc_stop(adev); 4132 4133 /* disable CG */ 4134 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4135 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4136 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4137 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4138 if (adev->asic_type == CHIP_POLARIS11 || 4139 adev->asic_type == CHIP_POLARIS10 || 4140 adev->asic_type == CHIP_POLARIS12) { 4141 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4142 tmp &= ~0x3; 4143 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4144 } 4145 4146 /* disable PG */ 4147 WREG32(mmRLC_PG_CNTL, 0); 4148 4149 gfx_v8_0_rlc_reset(adev); 4150 gfx_v8_0_init_pg(adev); 4151 4152 4153 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4154 /* legacy rlc firmware loading */ 4155 r = gfx_v8_0_rlc_load_microcode(adev); 4156 if (r) 4157 return r; 4158 } 4159 4160 gfx_v8_0_rlc_start(adev); 4161 4162 return 0; 4163 } 4164 4165 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4166 { 4167 int i; 4168 u32 tmp = RREG32(mmCP_ME_CNTL); 4169 4170 if (enable) { 4171 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4172 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4173 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4174 } else { 4175 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4176 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4177 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4178 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4179 adev->gfx.gfx_ring[i].ready = false; 4180 } 4181 WREG32(mmCP_ME_CNTL, tmp); 4182 udelay(50); 4183 } 4184 4185 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4186 { 4187 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4188 const struct gfx_firmware_header_v1_0 *ce_hdr; 4189 const struct gfx_firmware_header_v1_0 *me_hdr; 4190 const __le32 *fw_data; 4191 unsigned i, fw_size; 4192 4193 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4194 return -EINVAL; 4195 4196 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4197 adev->gfx.pfp_fw->data; 4198 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4199 adev->gfx.ce_fw->data; 4200 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4201 adev->gfx.me_fw->data; 4202 4203 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4204 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4205 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4206 4207 gfx_v8_0_cp_gfx_enable(adev, false); 4208 4209 /* PFP */ 4210 fw_data = (const __le32 *) 4211 (adev->gfx.pfp_fw->data + 4212 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4213 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4214 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4215 for (i = 0; i < fw_size; i++) 4216 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4217 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4218 4219 /* CE */ 4220 fw_data = (const __le32 *) 4221 (adev->gfx.ce_fw->data + 4222 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4223 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4224 WREG32(mmCP_CE_UCODE_ADDR, 0); 4225 for (i = 0; i < fw_size; i++) 4226 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4227 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4228 4229 /* ME */ 4230 fw_data = (const __le32 *) 4231 (adev->gfx.me_fw->data + 4232 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4233 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4234 WREG32(mmCP_ME_RAM_WADDR, 0); 4235 for (i = 0; i < fw_size; i++) 4236 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4237 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4238 4239 return 0; 4240 } 4241 4242 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4243 { 4244 u32 count = 0; 4245 const struct cs_section_def *sect = NULL; 4246 const struct cs_extent_def *ext = NULL; 4247 4248 /* begin clear state */ 4249 count += 2; 4250 /* context control state */ 4251 count += 3; 4252 4253 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4254 for (ext = sect->section; ext->extent != NULL; ++ext) { 4255 if (sect->id == SECT_CONTEXT) 4256 count += 2 + ext->reg_count; 4257 else 4258 return 0; 4259 } 4260 } 4261 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4262 count += 4; 4263 /* end clear state */ 4264 count += 2; 4265 /* clear state */ 4266 count += 2; 4267 4268 return count; 4269 } 4270 4271 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4272 { 4273 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4274 const struct cs_section_def *sect = NULL; 4275 const struct cs_extent_def *ext = NULL; 4276 int r, i; 4277 4278 /* init the CP */ 4279 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4280 WREG32(mmCP_ENDIAN_SWAP, 0); 4281 WREG32(mmCP_DEVICE_ID, 1); 4282 4283 gfx_v8_0_cp_gfx_enable(adev, true); 4284 4285 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4286 if (r) { 4287 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4288 return r; 4289 } 4290 4291 /* clear state buffer */ 4292 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4293 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4294 4295 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4296 amdgpu_ring_write(ring, 0x80000000); 4297 amdgpu_ring_write(ring, 0x80000000); 4298 4299 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4300 for (ext = sect->section; ext->extent != NULL; ++ext) { 4301 if (sect->id == SECT_CONTEXT) { 4302 amdgpu_ring_write(ring, 4303 PACKET3(PACKET3_SET_CONTEXT_REG, 4304 ext->reg_count)); 4305 amdgpu_ring_write(ring, 4306 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4307 for (i = 0; i < ext->reg_count; i++) 4308 amdgpu_ring_write(ring, ext->extent[i]); 4309 } 4310 } 4311 } 4312 4313 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4314 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4315 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4316 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4317 4318 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4319 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4320 4321 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4322 amdgpu_ring_write(ring, 0); 4323 4324 /* init the CE partitions */ 4325 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4326 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4327 amdgpu_ring_write(ring, 0x8000); 4328 amdgpu_ring_write(ring, 0x8000); 4329 4330 amdgpu_ring_commit(ring); 4331 4332 return 0; 4333 } 4334 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4335 { 4336 u32 tmp; 4337 /* no gfx doorbells on iceland */ 4338 if (adev->asic_type == CHIP_TOPAZ) 4339 return; 4340 4341 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4342 4343 if (ring->use_doorbell) { 4344 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4345 DOORBELL_OFFSET, ring->doorbell_index); 4346 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4347 DOORBELL_HIT, 0); 4348 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4349 DOORBELL_EN, 1); 4350 } else { 4351 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4352 } 4353 4354 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4355 4356 if (adev->flags & AMD_IS_APU) 4357 return; 4358 4359 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4360 DOORBELL_RANGE_LOWER, 4361 AMDGPU_DOORBELL_GFX_RING0); 4362 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4363 4364 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4365 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4366 } 4367 4368 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4369 { 4370 struct amdgpu_ring *ring; 4371 u32 tmp; 4372 u32 rb_bufsz; 4373 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4374 int r; 4375 4376 /* Set the write pointer delay */ 4377 WREG32(mmCP_RB_WPTR_DELAY, 0); 4378 4379 /* set the RB to use vmid 0 */ 4380 WREG32(mmCP_RB_VMID, 0); 4381 4382 /* Set ring buffer size */ 4383 ring = &adev->gfx.gfx_ring[0]; 4384 rb_bufsz = order_base_2(ring->ring_size / 8); 4385 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4386 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4387 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4388 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4389 #ifdef __BIG_ENDIAN 4390 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4391 #endif 4392 WREG32(mmCP_RB0_CNTL, tmp); 4393 4394 /* Initialize the ring buffer's read and write pointers */ 4395 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4396 ring->wptr = 0; 4397 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4398 4399 /* set the wb address wether it's enabled or not */ 4400 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4401 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4402 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4403 4404 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4405 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4406 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4407 mdelay(1); 4408 WREG32(mmCP_RB0_CNTL, tmp); 4409 4410 rb_addr = ring->gpu_addr >> 8; 4411 WREG32(mmCP_RB0_BASE, rb_addr); 4412 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4413 4414 gfx_v8_0_set_cpg_door_bell(adev, ring); 4415 /* start the ring */ 4416 amdgpu_ring_clear_ring(ring); 4417 gfx_v8_0_cp_gfx_start(adev); 4418 ring->ready = true; 4419 r = amdgpu_ring_test_ring(ring); 4420 if (r) 4421 ring->ready = false; 4422 4423 return r; 4424 } 4425 4426 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4427 { 4428 int i; 4429 4430 if (enable) { 4431 WREG32(mmCP_MEC_CNTL, 0); 4432 } else { 4433 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4434 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4435 adev->gfx.compute_ring[i].ready = false; 4436 adev->gfx.kiq.ring.ready = false; 4437 } 4438 udelay(50); 4439 } 4440 4441 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4442 { 4443 const struct gfx_firmware_header_v1_0 *mec_hdr; 4444 const __le32 *fw_data; 4445 unsigned i, fw_size; 4446 4447 if (!adev->gfx.mec_fw) 4448 return -EINVAL; 4449 4450 gfx_v8_0_cp_compute_enable(adev, false); 4451 4452 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4453 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4454 4455 fw_data = (const __le32 *) 4456 (adev->gfx.mec_fw->data + 4457 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4458 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4459 4460 /* MEC1 */ 4461 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4462 for (i = 0; i < fw_size; i++) 4463 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4464 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4465 4466 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4467 if (adev->gfx.mec2_fw) { 4468 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4469 4470 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4471 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4472 4473 fw_data = (const __le32 *) 4474 (adev->gfx.mec2_fw->data + 4475 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4476 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4477 4478 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4479 for (i = 0; i < fw_size; i++) 4480 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4481 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4482 } 4483 4484 return 0; 4485 } 4486 4487 /* KIQ functions */ 4488 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4489 { 4490 uint32_t tmp; 4491 struct amdgpu_device *adev = ring->adev; 4492 4493 /* tell RLC which is KIQ queue */ 4494 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4495 tmp &= 0xffffff00; 4496 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4497 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4498 tmp |= 0x80; 4499 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4500 } 4501 4502 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4503 { 4504 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4505 uint32_t scratch, tmp = 0; 4506 uint64_t queue_mask = 0; 4507 int r, i; 4508 4509 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4510 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4511 continue; 4512 4513 /* This situation may be hit in the future if a new HW 4514 * generation exposes more than 64 queues. If so, the 4515 * definition of queue_mask needs updating */ 4516 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4517 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4518 break; 4519 } 4520 4521 queue_mask |= (1ull << i); 4522 } 4523 4524 r = amdgpu_gfx_scratch_get(adev, &scratch); 4525 if (r) { 4526 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4527 return r; 4528 } 4529 WREG32(scratch, 0xCAFEDEAD); 4530 4531 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4532 if (r) { 4533 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4534 amdgpu_gfx_scratch_free(adev, scratch); 4535 return r; 4536 } 4537 /* set resources */ 4538 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4539 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4540 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4541 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4542 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4543 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4544 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4545 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4546 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4547 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4548 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4549 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4550 4551 /* map queues */ 4552 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4553 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4554 amdgpu_ring_write(kiq_ring, 4555 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4556 amdgpu_ring_write(kiq_ring, 4557 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4558 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4559 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4560 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4561 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4562 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4563 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4564 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4565 } 4566 /* write to scratch for completion */ 4567 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4568 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4569 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4570 amdgpu_ring_commit(kiq_ring); 4571 4572 for (i = 0; i < adev->usec_timeout; i++) { 4573 tmp = RREG32(scratch); 4574 if (tmp == 0xDEADBEEF) 4575 break; 4576 DRM_UDELAY(1); 4577 } 4578 if (i >= adev->usec_timeout) { 4579 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4580 scratch, tmp); 4581 r = -EINVAL; 4582 } 4583 amdgpu_gfx_scratch_free(adev, scratch); 4584 4585 return r; 4586 } 4587 4588 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4589 { 4590 int i, r = 0; 4591 4592 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4593 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4594 for (i = 0; i < adev->usec_timeout; i++) { 4595 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4596 break; 4597 udelay(1); 4598 } 4599 if (i == adev->usec_timeout) 4600 r = -ETIMEDOUT; 4601 } 4602 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4603 WREG32(mmCP_HQD_PQ_RPTR, 0); 4604 WREG32(mmCP_HQD_PQ_WPTR, 0); 4605 4606 return r; 4607 } 4608 4609 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4610 { 4611 struct amdgpu_device *adev = ring->adev; 4612 struct vi_mqd *mqd = ring->mqd_ptr; 4613 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4614 uint32_t tmp; 4615 4616 mqd->header = 0xC0310800; 4617 mqd->compute_pipelinestat_enable = 0x00000001; 4618 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4619 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4620 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4621 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4622 mqd->compute_misc_reserved = 0x00000003; 4623 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4624 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4625 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4626 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4627 eop_base_addr = ring->eop_gpu_addr >> 8; 4628 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4629 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4630 4631 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4632 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4633 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4634 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4635 4636 mqd->cp_hqd_eop_control = tmp; 4637 4638 /* enable doorbell? */ 4639 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4640 CP_HQD_PQ_DOORBELL_CONTROL, 4641 DOORBELL_EN, 4642 ring->use_doorbell ? 1 : 0); 4643 4644 mqd->cp_hqd_pq_doorbell_control = tmp; 4645 4646 /* set the pointer to the MQD */ 4647 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4648 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4649 4650 /* set MQD vmid to 0 */ 4651 tmp = RREG32(mmCP_MQD_CONTROL); 4652 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4653 mqd->cp_mqd_control = tmp; 4654 4655 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4656 hqd_gpu_addr = ring->gpu_addr >> 8; 4657 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4658 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4659 4660 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4661 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4662 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4663 (order_base_2(ring->ring_size / 4) - 1)); 4664 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4665 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4666 #ifdef __BIG_ENDIAN 4667 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4668 #endif 4669 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4671 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4672 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4673 mqd->cp_hqd_pq_control = tmp; 4674 4675 /* set the wb address whether it's enabled or not */ 4676 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4677 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4678 mqd->cp_hqd_pq_rptr_report_addr_hi = 4679 upper_32_bits(wb_gpu_addr) & 0xffff; 4680 4681 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4682 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4683 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4684 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4685 4686 tmp = 0; 4687 /* enable the doorbell if requested */ 4688 if (ring->use_doorbell) { 4689 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4690 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4691 DOORBELL_OFFSET, ring->doorbell_index); 4692 4693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4694 DOORBELL_EN, 1); 4695 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4696 DOORBELL_SOURCE, 0); 4697 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4698 DOORBELL_HIT, 0); 4699 } 4700 4701 mqd->cp_hqd_pq_doorbell_control = tmp; 4702 4703 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4704 ring->wptr = 0; 4705 mqd->cp_hqd_pq_wptr = ring->wptr; 4706 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4707 4708 /* set the vmid for the queue */ 4709 mqd->cp_hqd_vmid = 0; 4710 4711 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4712 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4713 mqd->cp_hqd_persistent_state = tmp; 4714 4715 /* set MTYPE */ 4716 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4717 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4718 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4719 mqd->cp_hqd_ib_control = tmp; 4720 4721 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4722 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4723 mqd->cp_hqd_iq_timer = tmp; 4724 4725 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4726 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4727 mqd->cp_hqd_ctx_save_control = tmp; 4728 4729 /* defaults */ 4730 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4731 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4732 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4733 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4734 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4735 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4736 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4737 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4738 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4739 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4740 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4741 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4742 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4743 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4744 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4745 4746 /* activate the queue */ 4747 mqd->cp_hqd_active = 1; 4748 4749 return 0; 4750 } 4751 4752 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4753 struct vi_mqd *mqd) 4754 { 4755 uint32_t mqd_reg; 4756 uint32_t *mqd_data; 4757 4758 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4759 mqd_data = &mqd->cp_mqd_base_addr_lo; 4760 4761 /* disable wptr polling */ 4762 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4763 4764 /* program all HQD registers */ 4765 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4766 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4767 4768 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4769 * This is safe since EOP RPTR==WPTR for any inactive HQD 4770 * on ASICs that do not support context-save. 4771 * EOP writes/reads can start anywhere in the ring. 4772 */ 4773 if (adev->asic_type != CHIP_TONGA) { 4774 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4775 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4776 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4777 } 4778 4779 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4780 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4781 4782 /* activate the HQD */ 4783 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4784 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4785 4786 return 0; 4787 } 4788 4789 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4790 { 4791 struct amdgpu_device *adev = ring->adev; 4792 struct vi_mqd *mqd = ring->mqd_ptr; 4793 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4794 4795 gfx_v8_0_kiq_setting(ring); 4796 4797 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4798 /* reset MQD to a clean status */ 4799 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4800 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4801 4802 /* reset ring buffer */ 4803 ring->wptr = 0; 4804 amdgpu_ring_clear_ring(ring); 4805 mutex_lock(&adev->srbm_mutex); 4806 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4807 gfx_v8_0_mqd_commit(adev, mqd); 4808 vi_srbm_select(adev, 0, 0, 0, 0); 4809 mutex_unlock(&adev->srbm_mutex); 4810 } else { 4811 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4812 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4813 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4814 mutex_lock(&adev->srbm_mutex); 4815 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4816 gfx_v8_0_mqd_init(ring); 4817 gfx_v8_0_mqd_commit(adev, mqd); 4818 vi_srbm_select(adev, 0, 0, 0, 0); 4819 mutex_unlock(&adev->srbm_mutex); 4820 4821 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4822 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4823 } 4824 4825 return 0; 4826 } 4827 4828 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4829 { 4830 struct amdgpu_device *adev = ring->adev; 4831 struct vi_mqd *mqd = ring->mqd_ptr; 4832 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4833 4834 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4835 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4836 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4837 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4838 mutex_lock(&adev->srbm_mutex); 4839 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4840 gfx_v8_0_mqd_init(ring); 4841 vi_srbm_select(adev, 0, 0, 0, 0); 4842 mutex_unlock(&adev->srbm_mutex); 4843 4844 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4845 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4846 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4847 /* reset MQD to a clean status */ 4848 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4849 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4850 } else { 4851 amdgpu_ring_clear_ring(ring); 4852 } 4853 return 0; 4854 } 4855 4856 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4857 { 4858 if (adev->asic_type > CHIP_TONGA) { 4859 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4860 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4861 } 4862 /* enable doorbells */ 4863 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4864 } 4865 4866 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4867 { 4868 struct amdgpu_ring *ring = NULL; 4869 int r = 0, i; 4870 4871 gfx_v8_0_cp_compute_enable(adev, true); 4872 4873 ring = &adev->gfx.kiq.ring; 4874 4875 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4876 if (unlikely(r != 0)) 4877 goto done; 4878 4879 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4880 if (!r) { 4881 r = gfx_v8_0_kiq_init_queue(ring); 4882 amdgpu_bo_kunmap(ring->mqd_obj); 4883 ring->mqd_ptr = NULL; 4884 } 4885 amdgpu_bo_unreserve(ring->mqd_obj); 4886 if (r) 4887 goto done; 4888 4889 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4890 ring = &adev->gfx.compute_ring[i]; 4891 4892 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4893 if (unlikely(r != 0)) 4894 goto done; 4895 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4896 if (!r) { 4897 r = gfx_v8_0_kcq_init_queue(ring); 4898 amdgpu_bo_kunmap(ring->mqd_obj); 4899 ring->mqd_ptr = NULL; 4900 } 4901 amdgpu_bo_unreserve(ring->mqd_obj); 4902 if (r) 4903 goto done; 4904 } 4905 4906 gfx_v8_0_set_mec_doorbell_range(adev); 4907 4908 r = gfx_v8_0_kiq_kcq_enable(adev); 4909 if (r) 4910 goto done; 4911 4912 /* Test KIQ */ 4913 ring = &adev->gfx.kiq.ring; 4914 ring->ready = true; 4915 r = amdgpu_ring_test_ring(ring); 4916 if (r) { 4917 ring->ready = false; 4918 goto done; 4919 } 4920 4921 /* Test KCQs */ 4922 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4923 ring = &adev->gfx.compute_ring[i]; 4924 if (adev->in_gpu_reset) { 4925 /* move reset ring buffer to here to workaround 4926 * compute ring test failed 4927 */ 4928 ring->wptr = 0; 4929 amdgpu_ring_clear_ring(ring); 4930 } 4931 ring->ready = true; 4932 r = amdgpu_ring_test_ring(ring); 4933 if (r) 4934 ring->ready = false; 4935 } 4936 4937 done: 4938 return r; 4939 } 4940 4941 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4942 { 4943 int r; 4944 4945 if (!(adev->flags & AMD_IS_APU)) 4946 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4947 4948 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4949 /* legacy firmware loading */ 4950 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4951 if (r) 4952 return r; 4953 4954 r = gfx_v8_0_cp_compute_load_microcode(adev); 4955 if (r) 4956 return r; 4957 } 4958 4959 r = gfx_v8_0_cp_gfx_resume(adev); 4960 if (r) 4961 return r; 4962 4963 r = gfx_v8_0_kiq_resume(adev); 4964 if (r) 4965 return r; 4966 4967 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4968 4969 return 0; 4970 } 4971 4972 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4973 { 4974 gfx_v8_0_cp_gfx_enable(adev, enable); 4975 gfx_v8_0_cp_compute_enable(adev, enable); 4976 } 4977 4978 static int gfx_v8_0_hw_init(void *handle) 4979 { 4980 int r; 4981 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4982 4983 gfx_v8_0_init_golden_registers(adev); 4984 gfx_v8_0_gpu_init(adev); 4985 4986 r = gfx_v8_0_rlc_resume(adev); 4987 if (r) 4988 return r; 4989 4990 r = gfx_v8_0_cp_resume(adev); 4991 4992 return r; 4993 } 4994 4995 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 4996 { 4997 struct amdgpu_device *adev = kiq_ring->adev; 4998 uint32_t scratch, tmp = 0; 4999 int r, i; 5000 5001 r = amdgpu_gfx_scratch_get(adev, &scratch); 5002 if (r) { 5003 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5004 return r; 5005 } 5006 WREG32(scratch, 0xCAFEDEAD); 5007 5008 r = amdgpu_ring_alloc(kiq_ring, 10); 5009 if (r) { 5010 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5011 amdgpu_gfx_scratch_free(adev, scratch); 5012 return r; 5013 } 5014 5015 /* unmap queues */ 5016 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5017 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5018 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5019 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5020 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5021 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5022 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5023 amdgpu_ring_write(kiq_ring, 0); 5024 amdgpu_ring_write(kiq_ring, 0); 5025 amdgpu_ring_write(kiq_ring, 0); 5026 /* write to scratch for completion */ 5027 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5028 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5029 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5030 amdgpu_ring_commit(kiq_ring); 5031 5032 for (i = 0; i < adev->usec_timeout; i++) { 5033 tmp = RREG32(scratch); 5034 if (tmp == 0xDEADBEEF) 5035 break; 5036 DRM_UDELAY(1); 5037 } 5038 if (i >= adev->usec_timeout) { 5039 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5040 r = -EINVAL; 5041 } 5042 amdgpu_gfx_scratch_free(adev, scratch); 5043 return r; 5044 } 5045 5046 static int gfx_v8_0_hw_fini(void *handle) 5047 { 5048 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5049 int i; 5050 5051 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5052 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5053 5054 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5055 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5056 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5057 5058 if (amdgpu_sriov_vf(adev)) { 5059 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5060 return 0; 5061 } 5062 gfx_v8_0_cp_enable(adev, false); 5063 gfx_v8_0_rlc_stop(adev); 5064 5065 amdgpu_device_ip_set_powergating_state(adev, 5066 AMD_IP_BLOCK_TYPE_GFX, 5067 AMD_PG_STATE_UNGATE); 5068 5069 return 0; 5070 } 5071 5072 static int gfx_v8_0_suspend(void *handle) 5073 { 5074 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5075 adev->gfx.in_suspend = true; 5076 return gfx_v8_0_hw_fini(adev); 5077 } 5078 5079 static int gfx_v8_0_resume(void *handle) 5080 { 5081 int r; 5082 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5083 5084 r = gfx_v8_0_hw_init(adev); 5085 adev->gfx.in_suspend = false; 5086 return r; 5087 } 5088 5089 static bool gfx_v8_0_is_idle(void *handle) 5090 { 5091 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5092 5093 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5094 return false; 5095 else 5096 return true; 5097 } 5098 5099 static int gfx_v8_0_wait_for_idle(void *handle) 5100 { 5101 unsigned i; 5102 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5103 5104 for (i = 0; i < adev->usec_timeout; i++) { 5105 if (gfx_v8_0_is_idle(handle)) 5106 return 0; 5107 5108 udelay(1); 5109 } 5110 return -ETIMEDOUT; 5111 } 5112 5113 static bool gfx_v8_0_check_soft_reset(void *handle) 5114 { 5115 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5116 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5117 u32 tmp; 5118 5119 /* GRBM_STATUS */ 5120 tmp = RREG32(mmGRBM_STATUS); 5121 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5122 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5123 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5124 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5125 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5126 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5127 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5128 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5129 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5130 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5131 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5132 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5133 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5134 } 5135 5136 /* GRBM_STATUS2 */ 5137 tmp = RREG32(mmGRBM_STATUS2); 5138 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5139 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5140 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5141 5142 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5143 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5144 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5145 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5146 SOFT_RESET_CPF, 1); 5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5148 SOFT_RESET_CPC, 1); 5149 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5150 SOFT_RESET_CPG, 1); 5151 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5152 SOFT_RESET_GRBM, 1); 5153 } 5154 5155 /* SRBM_STATUS */ 5156 tmp = RREG32(mmSRBM_STATUS); 5157 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5158 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5159 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5160 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5161 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5162 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5163 5164 if (grbm_soft_reset || srbm_soft_reset) { 5165 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5166 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5167 return true; 5168 } else { 5169 adev->gfx.grbm_soft_reset = 0; 5170 adev->gfx.srbm_soft_reset = 0; 5171 return false; 5172 } 5173 } 5174 5175 static int gfx_v8_0_pre_soft_reset(void *handle) 5176 { 5177 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5178 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5179 5180 if ((!adev->gfx.grbm_soft_reset) && 5181 (!adev->gfx.srbm_soft_reset)) 5182 return 0; 5183 5184 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5185 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5186 5187 /* stop the rlc */ 5188 gfx_v8_0_rlc_stop(adev); 5189 5190 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5191 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5192 /* Disable GFX parsing/prefetching */ 5193 gfx_v8_0_cp_gfx_enable(adev, false); 5194 5195 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5196 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5197 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5198 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5199 int i; 5200 5201 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5202 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5203 5204 mutex_lock(&adev->srbm_mutex); 5205 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5206 gfx_v8_0_deactivate_hqd(adev, 2); 5207 vi_srbm_select(adev, 0, 0, 0, 0); 5208 mutex_unlock(&adev->srbm_mutex); 5209 } 5210 /* Disable MEC parsing/prefetching */ 5211 gfx_v8_0_cp_compute_enable(adev, false); 5212 } 5213 5214 return 0; 5215 } 5216 5217 static int gfx_v8_0_soft_reset(void *handle) 5218 { 5219 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5220 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5221 u32 tmp; 5222 5223 if ((!adev->gfx.grbm_soft_reset) && 5224 (!adev->gfx.srbm_soft_reset)) 5225 return 0; 5226 5227 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5228 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5229 5230 if (grbm_soft_reset || srbm_soft_reset) { 5231 tmp = RREG32(mmGMCON_DEBUG); 5232 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5233 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5234 WREG32(mmGMCON_DEBUG, tmp); 5235 udelay(50); 5236 } 5237 5238 if (grbm_soft_reset) { 5239 tmp = RREG32(mmGRBM_SOFT_RESET); 5240 tmp |= grbm_soft_reset; 5241 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5242 WREG32(mmGRBM_SOFT_RESET, tmp); 5243 tmp = RREG32(mmGRBM_SOFT_RESET); 5244 5245 udelay(50); 5246 5247 tmp &= ~grbm_soft_reset; 5248 WREG32(mmGRBM_SOFT_RESET, tmp); 5249 tmp = RREG32(mmGRBM_SOFT_RESET); 5250 } 5251 5252 if (srbm_soft_reset) { 5253 tmp = RREG32(mmSRBM_SOFT_RESET); 5254 tmp |= srbm_soft_reset; 5255 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5256 WREG32(mmSRBM_SOFT_RESET, tmp); 5257 tmp = RREG32(mmSRBM_SOFT_RESET); 5258 5259 udelay(50); 5260 5261 tmp &= ~srbm_soft_reset; 5262 WREG32(mmSRBM_SOFT_RESET, tmp); 5263 tmp = RREG32(mmSRBM_SOFT_RESET); 5264 } 5265 5266 if (grbm_soft_reset || srbm_soft_reset) { 5267 tmp = RREG32(mmGMCON_DEBUG); 5268 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5269 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5270 WREG32(mmGMCON_DEBUG, tmp); 5271 } 5272 5273 /* Wait a little for things to settle down */ 5274 udelay(50); 5275 5276 return 0; 5277 } 5278 5279 static int gfx_v8_0_post_soft_reset(void *handle) 5280 { 5281 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5282 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5283 5284 if ((!adev->gfx.grbm_soft_reset) && 5285 (!adev->gfx.srbm_soft_reset)) 5286 return 0; 5287 5288 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5289 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5290 5291 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5292 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5293 gfx_v8_0_cp_gfx_resume(adev); 5294 5295 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5296 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5297 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5299 int i; 5300 5301 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5302 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5303 5304 mutex_lock(&adev->srbm_mutex); 5305 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5306 gfx_v8_0_deactivate_hqd(adev, 2); 5307 vi_srbm_select(adev, 0, 0, 0, 0); 5308 mutex_unlock(&adev->srbm_mutex); 5309 } 5310 gfx_v8_0_kiq_resume(adev); 5311 } 5312 gfx_v8_0_rlc_start(adev); 5313 5314 return 0; 5315 } 5316 5317 /** 5318 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5319 * 5320 * @adev: amdgpu_device pointer 5321 * 5322 * Fetches a GPU clock counter snapshot. 5323 * Returns the 64 bit clock counter snapshot. 5324 */ 5325 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5326 { 5327 uint64_t clock; 5328 5329 mutex_lock(&adev->gfx.gpu_clock_mutex); 5330 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5331 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5332 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5333 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5334 return clock; 5335 } 5336 5337 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5338 uint32_t vmid, 5339 uint32_t gds_base, uint32_t gds_size, 5340 uint32_t gws_base, uint32_t gws_size, 5341 uint32_t oa_base, uint32_t oa_size) 5342 { 5343 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5344 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5345 5346 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5347 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5348 5349 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5350 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5351 5352 /* GDS Base */ 5353 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5354 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5355 WRITE_DATA_DST_SEL(0))); 5356 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5357 amdgpu_ring_write(ring, 0); 5358 amdgpu_ring_write(ring, gds_base); 5359 5360 /* GDS Size */ 5361 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5362 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5363 WRITE_DATA_DST_SEL(0))); 5364 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5365 amdgpu_ring_write(ring, 0); 5366 amdgpu_ring_write(ring, gds_size); 5367 5368 /* GWS */ 5369 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5370 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5371 WRITE_DATA_DST_SEL(0))); 5372 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5373 amdgpu_ring_write(ring, 0); 5374 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5375 5376 /* OA */ 5377 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5378 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5379 WRITE_DATA_DST_SEL(0))); 5380 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5381 amdgpu_ring_write(ring, 0); 5382 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5383 } 5384 5385 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5386 { 5387 WREG32(mmSQ_IND_INDEX, 5388 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5389 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5390 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5391 (SQ_IND_INDEX__FORCE_READ_MASK)); 5392 return RREG32(mmSQ_IND_DATA); 5393 } 5394 5395 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5396 uint32_t wave, uint32_t thread, 5397 uint32_t regno, uint32_t num, uint32_t *out) 5398 { 5399 WREG32(mmSQ_IND_INDEX, 5400 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5401 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5402 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5403 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5404 (SQ_IND_INDEX__FORCE_READ_MASK) | 5405 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5406 while (num--) 5407 *(out++) = RREG32(mmSQ_IND_DATA); 5408 } 5409 5410 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5411 { 5412 /* type 0 wave data */ 5413 dst[(*no_fields)++] = 0; 5414 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5415 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5416 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5417 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5418 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5419 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5420 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5421 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5422 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5423 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5424 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5425 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5426 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5427 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5428 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5429 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5430 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5431 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5432 } 5433 5434 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5435 uint32_t wave, uint32_t start, 5436 uint32_t size, uint32_t *dst) 5437 { 5438 wave_read_regs( 5439 adev, simd, wave, 0, 5440 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5441 } 5442 5443 5444 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5445 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5446 .select_se_sh = &gfx_v8_0_select_se_sh, 5447 .read_wave_data = &gfx_v8_0_read_wave_data, 5448 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5449 }; 5450 5451 static int gfx_v8_0_early_init(void *handle) 5452 { 5453 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5454 5455 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5456 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5457 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5458 gfx_v8_0_set_ring_funcs(adev); 5459 gfx_v8_0_set_irq_funcs(adev); 5460 gfx_v8_0_set_gds_init(adev); 5461 gfx_v8_0_set_rlc_funcs(adev); 5462 5463 return 0; 5464 } 5465 5466 static int gfx_v8_0_late_init(void *handle) 5467 { 5468 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5469 int r; 5470 5471 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5472 if (r) 5473 return r; 5474 5475 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5476 if (r) 5477 return r; 5478 5479 /* requires IBs so do in late init after IB pool is initialized */ 5480 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5481 if (r) 5482 return r; 5483 5484 amdgpu_device_ip_set_powergating_state(adev, 5485 AMD_IP_BLOCK_TYPE_GFX, 5486 AMD_PG_STATE_GATE); 5487 5488 return 0; 5489 } 5490 5491 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5492 bool enable) 5493 { 5494 if ((adev->asic_type == CHIP_POLARIS11) || 5495 (adev->asic_type == CHIP_POLARIS12)) 5496 /* Send msg to SMU via Powerplay */ 5497 amdgpu_device_ip_set_powergating_state(adev, 5498 AMD_IP_BLOCK_TYPE_SMC, 5499 enable ? 5500 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5501 5502 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5503 } 5504 5505 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5506 bool enable) 5507 { 5508 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5509 } 5510 5511 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5512 bool enable) 5513 { 5514 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5515 } 5516 5517 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5518 bool enable) 5519 { 5520 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5521 } 5522 5523 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5524 bool enable) 5525 { 5526 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5527 5528 /* Read any GFX register to wake up GFX. */ 5529 if (!enable) 5530 RREG32(mmDB_RENDER_CONTROL); 5531 } 5532 5533 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5534 bool enable) 5535 { 5536 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5537 cz_enable_gfx_cg_power_gating(adev, true); 5538 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5539 cz_enable_gfx_pipeline_power_gating(adev, true); 5540 } else { 5541 cz_enable_gfx_cg_power_gating(adev, false); 5542 cz_enable_gfx_pipeline_power_gating(adev, false); 5543 } 5544 } 5545 5546 static int gfx_v8_0_set_powergating_state(void *handle, 5547 enum amd_powergating_state state) 5548 { 5549 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5550 bool enable = (state == AMD_PG_STATE_GATE); 5551 5552 if (amdgpu_sriov_vf(adev)) 5553 return 0; 5554 5555 switch (adev->asic_type) { 5556 case CHIP_CARRIZO: 5557 case CHIP_STONEY: 5558 5559 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5560 cz_enable_sck_slow_down_on_power_up(adev, true); 5561 cz_enable_sck_slow_down_on_power_down(adev, true); 5562 } else { 5563 cz_enable_sck_slow_down_on_power_up(adev, false); 5564 cz_enable_sck_slow_down_on_power_down(adev, false); 5565 } 5566 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5567 cz_enable_cp_power_gating(adev, true); 5568 else 5569 cz_enable_cp_power_gating(adev, false); 5570 5571 cz_update_gfx_cg_power_gating(adev, enable); 5572 5573 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5574 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5575 else 5576 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5577 5578 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5579 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5580 else 5581 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5582 break; 5583 case CHIP_POLARIS11: 5584 case CHIP_POLARIS12: 5585 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5586 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5587 else 5588 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5589 5590 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5591 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5592 else 5593 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5594 5595 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5596 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5597 else 5598 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5599 break; 5600 default: 5601 break; 5602 } 5603 5604 return 0; 5605 } 5606 5607 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5608 { 5609 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5610 int data; 5611 5612 if (amdgpu_sriov_vf(adev)) 5613 *flags = 0; 5614 5615 /* AMD_CG_SUPPORT_GFX_MGCG */ 5616 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5617 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5618 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5619 5620 /* AMD_CG_SUPPORT_GFX_CGLG */ 5621 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5622 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5623 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5624 5625 /* AMD_CG_SUPPORT_GFX_CGLS */ 5626 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5627 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5628 5629 /* AMD_CG_SUPPORT_GFX_CGTS */ 5630 data = RREG32(mmCGTS_SM_CTRL_REG); 5631 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5632 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5633 5634 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5635 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5636 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5637 5638 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5639 data = RREG32(mmRLC_MEM_SLP_CNTL); 5640 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5641 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5642 5643 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5644 data = RREG32(mmCP_MEM_SLP_CNTL); 5645 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5646 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5647 } 5648 5649 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5650 uint32_t reg_addr, uint32_t cmd) 5651 { 5652 uint32_t data; 5653 5654 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5655 5656 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5657 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5658 5659 data = RREG32(mmRLC_SERDES_WR_CTRL); 5660 if (adev->asic_type == CHIP_STONEY) 5661 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5662 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5663 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5664 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5665 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5666 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5667 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5668 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5669 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5670 else 5671 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5672 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5673 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5674 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5675 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5676 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5677 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5678 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5679 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5680 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5681 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5682 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5683 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5684 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5685 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5686 5687 WREG32(mmRLC_SERDES_WR_CTRL, data); 5688 } 5689 5690 #define MSG_ENTER_RLC_SAFE_MODE 1 5691 #define MSG_EXIT_RLC_SAFE_MODE 0 5692 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5693 #define RLC_GPR_REG2__REQ__SHIFT 0 5694 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5695 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5696 5697 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5698 { 5699 u32 data; 5700 unsigned i; 5701 5702 data = RREG32(mmRLC_CNTL); 5703 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5704 return; 5705 5706 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5707 data |= RLC_SAFE_MODE__CMD_MASK; 5708 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5709 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5710 WREG32(mmRLC_SAFE_MODE, data); 5711 5712 for (i = 0; i < adev->usec_timeout; i++) { 5713 if ((RREG32(mmRLC_GPM_STAT) & 5714 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5715 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5716 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5717 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5718 break; 5719 udelay(1); 5720 } 5721 5722 for (i = 0; i < adev->usec_timeout; i++) { 5723 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5724 break; 5725 udelay(1); 5726 } 5727 adev->gfx.rlc.in_safe_mode = true; 5728 } 5729 } 5730 5731 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5732 { 5733 u32 data = 0; 5734 unsigned i; 5735 5736 data = RREG32(mmRLC_CNTL); 5737 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5738 return; 5739 5740 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5741 if (adev->gfx.rlc.in_safe_mode) { 5742 data |= RLC_SAFE_MODE__CMD_MASK; 5743 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5744 WREG32(mmRLC_SAFE_MODE, data); 5745 adev->gfx.rlc.in_safe_mode = false; 5746 } 5747 } 5748 5749 for (i = 0; i < adev->usec_timeout; i++) { 5750 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5751 break; 5752 udelay(1); 5753 } 5754 } 5755 5756 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5757 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5758 .exit_safe_mode = iceland_exit_rlc_safe_mode 5759 }; 5760 5761 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5762 bool enable) 5763 { 5764 uint32_t temp, data; 5765 5766 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5767 5768 /* It is disabled by HW by default */ 5769 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5770 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5771 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5772 /* 1 - RLC memory Light sleep */ 5773 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5774 5775 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5776 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5777 } 5778 5779 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5780 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5781 if (adev->flags & AMD_IS_APU) 5782 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5783 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5784 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5785 else 5786 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5787 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5788 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5789 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5790 5791 if (temp != data) 5792 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5793 5794 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5795 gfx_v8_0_wait_for_rlc_serdes(adev); 5796 5797 /* 5 - clear mgcg override */ 5798 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5799 5800 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5801 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5802 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5803 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5804 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5805 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5806 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5807 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5808 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5809 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5810 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5811 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5812 if (temp != data) 5813 WREG32(mmCGTS_SM_CTRL_REG, data); 5814 } 5815 udelay(50); 5816 5817 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5818 gfx_v8_0_wait_for_rlc_serdes(adev); 5819 } else { 5820 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5821 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5822 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5823 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5824 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5825 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5826 if (temp != data) 5827 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5828 5829 /* 2 - disable MGLS in RLC */ 5830 data = RREG32(mmRLC_MEM_SLP_CNTL); 5831 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5832 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5833 WREG32(mmRLC_MEM_SLP_CNTL, data); 5834 } 5835 5836 /* 3 - disable MGLS in CP */ 5837 data = RREG32(mmCP_MEM_SLP_CNTL); 5838 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5839 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5840 WREG32(mmCP_MEM_SLP_CNTL, data); 5841 } 5842 5843 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5844 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5845 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5846 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5847 if (temp != data) 5848 WREG32(mmCGTS_SM_CTRL_REG, data); 5849 5850 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5851 gfx_v8_0_wait_for_rlc_serdes(adev); 5852 5853 /* 6 - set mgcg override */ 5854 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5855 5856 udelay(50); 5857 5858 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5859 gfx_v8_0_wait_for_rlc_serdes(adev); 5860 } 5861 5862 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5863 } 5864 5865 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5866 bool enable) 5867 { 5868 uint32_t temp, temp1, data, data1; 5869 5870 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5871 5872 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5873 5874 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5875 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5876 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5877 if (temp1 != data1) 5878 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5879 5880 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5881 gfx_v8_0_wait_for_rlc_serdes(adev); 5882 5883 /* 2 - clear cgcg override */ 5884 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5885 5886 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5887 gfx_v8_0_wait_for_rlc_serdes(adev); 5888 5889 /* 3 - write cmd to set CGLS */ 5890 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5891 5892 /* 4 - enable cgcg */ 5893 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5894 5895 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5896 /* enable cgls*/ 5897 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5898 5899 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5900 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5901 5902 if (temp1 != data1) 5903 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5904 } else { 5905 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5906 } 5907 5908 if (temp != data) 5909 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5910 5911 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5912 * Cmp_busy/GFX_Idle interrupts 5913 */ 5914 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5915 } else { 5916 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5917 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5918 5919 /* TEST CGCG */ 5920 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5921 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5922 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5923 if (temp1 != data1) 5924 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5925 5926 /* read gfx register to wake up cgcg */ 5927 RREG32(mmCB_CGTT_SCLK_CTRL); 5928 RREG32(mmCB_CGTT_SCLK_CTRL); 5929 RREG32(mmCB_CGTT_SCLK_CTRL); 5930 RREG32(mmCB_CGTT_SCLK_CTRL); 5931 5932 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5933 gfx_v8_0_wait_for_rlc_serdes(adev); 5934 5935 /* write cmd to Set CGCG Overrride */ 5936 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5937 5938 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5939 gfx_v8_0_wait_for_rlc_serdes(adev); 5940 5941 /* write cmd to Clear CGLS */ 5942 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5943 5944 /* disable cgcg, cgls should be disabled too. */ 5945 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5946 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5947 if (temp != data) 5948 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5949 /* enable interrupts again for PG */ 5950 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5951 } 5952 5953 gfx_v8_0_wait_for_rlc_serdes(adev); 5954 5955 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5956 } 5957 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5958 bool enable) 5959 { 5960 if (enable) { 5961 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5962 * === MGCG + MGLS + TS(CG/LS) === 5963 */ 5964 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5965 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5966 } else { 5967 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5968 * === CGCG + CGLS === 5969 */ 5970 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5971 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5972 } 5973 return 0; 5974 } 5975 5976 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5977 enum amd_clockgating_state state) 5978 { 5979 uint32_t msg_id, pp_state = 0; 5980 uint32_t pp_support_state = 0; 5981 5982 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5983 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5984 pp_support_state = PP_STATE_SUPPORT_LS; 5985 pp_state = PP_STATE_LS; 5986 } 5987 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5988 pp_support_state |= PP_STATE_SUPPORT_CG; 5989 pp_state |= PP_STATE_CG; 5990 } 5991 if (state == AMD_CG_STATE_UNGATE) 5992 pp_state = 0; 5993 5994 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5995 PP_BLOCK_GFX_CG, 5996 pp_support_state, 5997 pp_state); 5998 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5999 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6000 } 6001 6002 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6003 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6004 pp_support_state = PP_STATE_SUPPORT_LS; 6005 pp_state = PP_STATE_LS; 6006 } 6007 6008 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6009 pp_support_state |= PP_STATE_SUPPORT_CG; 6010 pp_state |= PP_STATE_CG; 6011 } 6012 6013 if (state == AMD_CG_STATE_UNGATE) 6014 pp_state = 0; 6015 6016 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6017 PP_BLOCK_GFX_MG, 6018 pp_support_state, 6019 pp_state); 6020 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6021 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6022 } 6023 6024 return 0; 6025 } 6026 6027 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6028 enum amd_clockgating_state state) 6029 { 6030 6031 uint32_t msg_id, pp_state = 0; 6032 uint32_t pp_support_state = 0; 6033 6034 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6035 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6036 pp_support_state = PP_STATE_SUPPORT_LS; 6037 pp_state = PP_STATE_LS; 6038 } 6039 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6040 pp_support_state |= PP_STATE_SUPPORT_CG; 6041 pp_state |= PP_STATE_CG; 6042 } 6043 if (state == AMD_CG_STATE_UNGATE) 6044 pp_state = 0; 6045 6046 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6047 PP_BLOCK_GFX_CG, 6048 pp_support_state, 6049 pp_state); 6050 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6051 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6052 } 6053 6054 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6055 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6056 pp_support_state = PP_STATE_SUPPORT_LS; 6057 pp_state = PP_STATE_LS; 6058 } 6059 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6060 pp_support_state |= PP_STATE_SUPPORT_CG; 6061 pp_state |= PP_STATE_CG; 6062 } 6063 if (state == AMD_CG_STATE_UNGATE) 6064 pp_state = 0; 6065 6066 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6067 PP_BLOCK_GFX_3D, 6068 pp_support_state, 6069 pp_state); 6070 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6071 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6072 } 6073 6074 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6075 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6076 pp_support_state = PP_STATE_SUPPORT_LS; 6077 pp_state = PP_STATE_LS; 6078 } 6079 6080 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6081 pp_support_state |= PP_STATE_SUPPORT_CG; 6082 pp_state |= PP_STATE_CG; 6083 } 6084 6085 if (state == AMD_CG_STATE_UNGATE) 6086 pp_state = 0; 6087 6088 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6089 PP_BLOCK_GFX_MG, 6090 pp_support_state, 6091 pp_state); 6092 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6093 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6094 } 6095 6096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6097 pp_support_state = PP_STATE_SUPPORT_LS; 6098 6099 if (state == AMD_CG_STATE_UNGATE) 6100 pp_state = 0; 6101 else 6102 pp_state = PP_STATE_LS; 6103 6104 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6105 PP_BLOCK_GFX_RLC, 6106 pp_support_state, 6107 pp_state); 6108 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6109 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6110 } 6111 6112 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6113 pp_support_state = PP_STATE_SUPPORT_LS; 6114 6115 if (state == AMD_CG_STATE_UNGATE) 6116 pp_state = 0; 6117 else 6118 pp_state = PP_STATE_LS; 6119 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6120 PP_BLOCK_GFX_CP, 6121 pp_support_state, 6122 pp_state); 6123 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6124 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6125 } 6126 6127 return 0; 6128 } 6129 6130 static int gfx_v8_0_set_clockgating_state(void *handle, 6131 enum amd_clockgating_state state) 6132 { 6133 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6134 6135 if (amdgpu_sriov_vf(adev)) 6136 return 0; 6137 6138 switch (adev->asic_type) { 6139 case CHIP_FIJI: 6140 case CHIP_CARRIZO: 6141 case CHIP_STONEY: 6142 gfx_v8_0_update_gfx_clock_gating(adev, 6143 state == AMD_CG_STATE_GATE); 6144 break; 6145 case CHIP_TONGA: 6146 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6147 break; 6148 case CHIP_POLARIS10: 6149 case CHIP_POLARIS11: 6150 case CHIP_POLARIS12: 6151 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6152 break; 6153 default: 6154 break; 6155 } 6156 return 0; 6157 } 6158 6159 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6160 { 6161 return ring->adev->wb.wb[ring->rptr_offs]; 6162 } 6163 6164 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6165 { 6166 struct amdgpu_device *adev = ring->adev; 6167 6168 if (ring->use_doorbell) 6169 /* XXX check if swapping is necessary on BE */ 6170 return ring->adev->wb.wb[ring->wptr_offs]; 6171 else 6172 return RREG32(mmCP_RB0_WPTR); 6173 } 6174 6175 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6176 { 6177 struct amdgpu_device *adev = ring->adev; 6178 6179 if (ring->use_doorbell) { 6180 /* XXX check if swapping is necessary on BE */ 6181 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6182 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6183 } else { 6184 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6185 (void)RREG32(mmCP_RB0_WPTR); 6186 } 6187 } 6188 6189 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6190 { 6191 u32 ref_and_mask, reg_mem_engine; 6192 6193 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6194 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6195 switch (ring->me) { 6196 case 1: 6197 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6198 break; 6199 case 2: 6200 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6201 break; 6202 default: 6203 return; 6204 } 6205 reg_mem_engine = 0; 6206 } else { 6207 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6208 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6209 } 6210 6211 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6212 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6213 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6214 reg_mem_engine)); 6215 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6216 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6217 amdgpu_ring_write(ring, ref_and_mask); 6218 amdgpu_ring_write(ring, ref_and_mask); 6219 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6220 } 6221 6222 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6223 { 6224 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6225 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6226 EVENT_INDEX(4)); 6227 6228 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6229 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6230 EVENT_INDEX(0)); 6231 } 6232 6233 6234 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6235 { 6236 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6237 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6238 WRITE_DATA_DST_SEL(0) | 6239 WR_CONFIRM)); 6240 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6241 amdgpu_ring_write(ring, 0); 6242 amdgpu_ring_write(ring, 1); 6243 6244 } 6245 6246 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6247 struct amdgpu_ib *ib, 6248 unsigned vmid, bool ctx_switch) 6249 { 6250 u32 header, control = 0; 6251 6252 if (ib->flags & AMDGPU_IB_FLAG_CE) 6253 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6254 else 6255 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6256 6257 control |= ib->length_dw | (vmid << 24); 6258 6259 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6260 control |= INDIRECT_BUFFER_PRE_ENB(1); 6261 6262 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6263 gfx_v8_0_ring_emit_de_meta(ring); 6264 } 6265 6266 amdgpu_ring_write(ring, header); 6267 amdgpu_ring_write(ring, 6268 #ifdef __BIG_ENDIAN 6269 (2 << 0) | 6270 #endif 6271 (ib->gpu_addr & 0xFFFFFFFC)); 6272 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6273 amdgpu_ring_write(ring, control); 6274 } 6275 6276 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6277 struct amdgpu_ib *ib, 6278 unsigned vmid, bool ctx_switch) 6279 { 6280 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6281 6282 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6283 amdgpu_ring_write(ring, 6284 #ifdef __BIG_ENDIAN 6285 (2 << 0) | 6286 #endif 6287 (ib->gpu_addr & 0xFFFFFFFC)); 6288 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6289 amdgpu_ring_write(ring, control); 6290 } 6291 6292 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6293 u64 seq, unsigned flags) 6294 { 6295 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6296 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6297 6298 /* EVENT_WRITE_EOP - flush caches, send int */ 6299 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6300 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6301 EOP_TC_ACTION_EN | 6302 EOP_TC_WB_ACTION_EN | 6303 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6304 EVENT_INDEX(5))); 6305 amdgpu_ring_write(ring, addr & 0xfffffffc); 6306 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6307 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6308 amdgpu_ring_write(ring, lower_32_bits(seq)); 6309 amdgpu_ring_write(ring, upper_32_bits(seq)); 6310 6311 } 6312 6313 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6314 { 6315 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6316 uint32_t seq = ring->fence_drv.sync_seq; 6317 uint64_t addr = ring->fence_drv.gpu_addr; 6318 6319 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6320 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6321 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6322 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6323 amdgpu_ring_write(ring, addr & 0xfffffffc); 6324 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6325 amdgpu_ring_write(ring, seq); 6326 amdgpu_ring_write(ring, 0xffffffff); 6327 amdgpu_ring_write(ring, 4); /* poll interval */ 6328 } 6329 6330 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6331 unsigned vmid, uint64_t pd_addr) 6332 { 6333 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6334 6335 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6336 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6337 WRITE_DATA_DST_SEL(0)) | 6338 WR_CONFIRM); 6339 if (vmid < 8) { 6340 amdgpu_ring_write(ring, 6341 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); 6342 } else { 6343 amdgpu_ring_write(ring, 6344 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); 6345 } 6346 amdgpu_ring_write(ring, 0); 6347 amdgpu_ring_write(ring, pd_addr >> 12); 6348 6349 /* bits 0-15 are the VM contexts0-15 */ 6350 /* invalidate the cache */ 6351 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6352 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6353 WRITE_DATA_DST_SEL(0))); 6354 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6355 amdgpu_ring_write(ring, 0); 6356 amdgpu_ring_write(ring, 1 << vmid); 6357 6358 /* wait for the invalidate to complete */ 6359 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6360 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6361 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6362 WAIT_REG_MEM_ENGINE(0))); /* me */ 6363 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6364 amdgpu_ring_write(ring, 0); 6365 amdgpu_ring_write(ring, 0); /* ref */ 6366 amdgpu_ring_write(ring, 0); /* mask */ 6367 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6368 6369 /* compute doesn't have PFP */ 6370 if (usepfp) { 6371 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6372 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6373 amdgpu_ring_write(ring, 0x0); 6374 } 6375 } 6376 6377 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6378 { 6379 return ring->adev->wb.wb[ring->wptr_offs]; 6380 } 6381 6382 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6383 { 6384 struct amdgpu_device *adev = ring->adev; 6385 6386 /* XXX check if swapping is necessary on BE */ 6387 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6388 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6389 } 6390 6391 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6392 bool acquire) 6393 { 6394 struct amdgpu_device *adev = ring->adev; 6395 int pipe_num, tmp, reg; 6396 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6397 6398 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6399 6400 /* first me only has 2 entries, GFX and HP3D */ 6401 if (ring->me > 0) 6402 pipe_num -= 2; 6403 6404 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6405 tmp = RREG32(reg); 6406 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6407 WREG32(reg, tmp); 6408 } 6409 6410 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6411 struct amdgpu_ring *ring, 6412 bool acquire) 6413 { 6414 int i, pipe; 6415 bool reserve; 6416 struct amdgpu_ring *iring; 6417 6418 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6419 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6420 if (acquire) 6421 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6422 else 6423 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6424 6425 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6426 /* Clear all reservations - everyone reacquires all resources */ 6427 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6428 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6429 true); 6430 6431 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6432 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6433 true); 6434 } else { 6435 /* Lower all pipes without a current reservation */ 6436 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6437 iring = &adev->gfx.gfx_ring[i]; 6438 pipe = amdgpu_gfx_queue_to_bit(adev, 6439 iring->me, 6440 iring->pipe, 6441 0); 6442 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6443 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6444 } 6445 6446 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6447 iring = &adev->gfx.compute_ring[i]; 6448 pipe = amdgpu_gfx_queue_to_bit(adev, 6449 iring->me, 6450 iring->pipe, 6451 0); 6452 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6453 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6454 } 6455 } 6456 6457 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6458 } 6459 6460 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6461 struct amdgpu_ring *ring, 6462 bool acquire) 6463 { 6464 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6465 uint32_t queue_priority = acquire ? 0xf : 0x0; 6466 6467 mutex_lock(&adev->srbm_mutex); 6468 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6469 6470 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6471 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6472 6473 vi_srbm_select(adev, 0, 0, 0, 0); 6474 mutex_unlock(&adev->srbm_mutex); 6475 } 6476 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6477 enum drm_sched_priority priority) 6478 { 6479 struct amdgpu_device *adev = ring->adev; 6480 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6481 6482 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6483 return; 6484 6485 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6486 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6487 } 6488 6489 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6490 u64 addr, u64 seq, 6491 unsigned flags) 6492 { 6493 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6494 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6495 6496 /* RELEASE_MEM - flush caches, send int */ 6497 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6498 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6499 EOP_TC_ACTION_EN | 6500 EOP_TC_WB_ACTION_EN | 6501 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6502 EVENT_INDEX(5))); 6503 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6504 amdgpu_ring_write(ring, addr & 0xfffffffc); 6505 amdgpu_ring_write(ring, upper_32_bits(addr)); 6506 amdgpu_ring_write(ring, lower_32_bits(seq)); 6507 amdgpu_ring_write(ring, upper_32_bits(seq)); 6508 } 6509 6510 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6511 u64 seq, unsigned int flags) 6512 { 6513 /* we only allocate 32bit for each seq wb address */ 6514 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6515 6516 /* write fence seq to the "addr" */ 6517 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6518 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6519 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6520 amdgpu_ring_write(ring, lower_32_bits(addr)); 6521 amdgpu_ring_write(ring, upper_32_bits(addr)); 6522 amdgpu_ring_write(ring, lower_32_bits(seq)); 6523 6524 if (flags & AMDGPU_FENCE_FLAG_INT) { 6525 /* set register to trigger INT */ 6526 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6527 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6528 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6529 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6530 amdgpu_ring_write(ring, 0); 6531 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6532 } 6533 } 6534 6535 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6536 { 6537 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6538 amdgpu_ring_write(ring, 0); 6539 } 6540 6541 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6542 { 6543 uint32_t dw2 = 0; 6544 6545 if (amdgpu_sriov_vf(ring->adev)) 6546 gfx_v8_0_ring_emit_ce_meta(ring); 6547 6548 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6549 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6550 gfx_v8_0_ring_emit_vgt_flush(ring); 6551 /* set load_global_config & load_global_uconfig */ 6552 dw2 |= 0x8001; 6553 /* set load_cs_sh_regs */ 6554 dw2 |= 0x01000000; 6555 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6556 dw2 |= 0x10002; 6557 6558 /* set load_ce_ram if preamble presented */ 6559 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6560 dw2 |= 0x10000000; 6561 } else { 6562 /* still load_ce_ram if this is the first time preamble presented 6563 * although there is no context switch happens. 6564 */ 6565 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6566 dw2 |= 0x10000000; 6567 } 6568 6569 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6570 amdgpu_ring_write(ring, dw2); 6571 amdgpu_ring_write(ring, 0); 6572 } 6573 6574 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6575 { 6576 unsigned ret; 6577 6578 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6579 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6580 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6581 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6582 ret = ring->wptr & ring->buf_mask; 6583 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6584 return ret; 6585 } 6586 6587 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6588 { 6589 unsigned cur; 6590 6591 BUG_ON(offset > ring->buf_mask); 6592 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6593 6594 cur = (ring->wptr & ring->buf_mask) - 1; 6595 if (likely(cur > offset)) 6596 ring->ring[offset] = cur - offset; 6597 else 6598 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6599 } 6600 6601 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6602 { 6603 struct amdgpu_device *adev = ring->adev; 6604 6605 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6606 amdgpu_ring_write(ring, 0 | /* src: register*/ 6607 (5 << 8) | /* dst: memory */ 6608 (1 << 20)); /* write confirm */ 6609 amdgpu_ring_write(ring, reg); 6610 amdgpu_ring_write(ring, 0); 6611 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6612 adev->virt.reg_val_offs * 4)); 6613 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6614 adev->virt.reg_val_offs * 4)); 6615 } 6616 6617 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6618 uint32_t val) 6619 { 6620 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6621 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6622 amdgpu_ring_write(ring, reg); 6623 amdgpu_ring_write(ring, 0); 6624 amdgpu_ring_write(ring, val); 6625 } 6626 6627 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6628 enum amdgpu_interrupt_state state) 6629 { 6630 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6631 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6632 } 6633 6634 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6635 int me, int pipe, 6636 enum amdgpu_interrupt_state state) 6637 { 6638 u32 mec_int_cntl, mec_int_cntl_reg; 6639 6640 /* 6641 * amdgpu controls only the first MEC. That's why this function only 6642 * handles the setting of interrupts for this specific MEC. All other 6643 * pipes' interrupts are set by amdkfd. 6644 */ 6645 6646 if (me == 1) { 6647 switch (pipe) { 6648 case 0: 6649 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6650 break; 6651 case 1: 6652 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6653 break; 6654 case 2: 6655 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6656 break; 6657 case 3: 6658 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6659 break; 6660 default: 6661 DRM_DEBUG("invalid pipe %d\n", pipe); 6662 return; 6663 } 6664 } else { 6665 DRM_DEBUG("invalid me %d\n", me); 6666 return; 6667 } 6668 6669 switch (state) { 6670 case AMDGPU_IRQ_STATE_DISABLE: 6671 mec_int_cntl = RREG32(mec_int_cntl_reg); 6672 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6673 WREG32(mec_int_cntl_reg, mec_int_cntl); 6674 break; 6675 case AMDGPU_IRQ_STATE_ENABLE: 6676 mec_int_cntl = RREG32(mec_int_cntl_reg); 6677 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6678 WREG32(mec_int_cntl_reg, mec_int_cntl); 6679 break; 6680 default: 6681 break; 6682 } 6683 } 6684 6685 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6686 struct amdgpu_irq_src *source, 6687 unsigned type, 6688 enum amdgpu_interrupt_state state) 6689 { 6690 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6691 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6692 6693 return 0; 6694 } 6695 6696 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6697 struct amdgpu_irq_src *source, 6698 unsigned type, 6699 enum amdgpu_interrupt_state state) 6700 { 6701 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6702 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6703 6704 return 0; 6705 } 6706 6707 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6708 struct amdgpu_irq_src *src, 6709 unsigned type, 6710 enum amdgpu_interrupt_state state) 6711 { 6712 switch (type) { 6713 case AMDGPU_CP_IRQ_GFX_EOP: 6714 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6715 break; 6716 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6717 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6718 break; 6719 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6720 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6721 break; 6722 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6723 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6724 break; 6725 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6726 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6727 break; 6728 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6729 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6730 break; 6731 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6732 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6733 break; 6734 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6735 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6736 break; 6737 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6738 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6739 break; 6740 default: 6741 break; 6742 } 6743 return 0; 6744 } 6745 6746 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6747 struct amdgpu_irq_src *source, 6748 struct amdgpu_iv_entry *entry) 6749 { 6750 int i; 6751 u8 me_id, pipe_id, queue_id; 6752 struct amdgpu_ring *ring; 6753 6754 DRM_DEBUG("IH: CP EOP\n"); 6755 me_id = (entry->ring_id & 0x0c) >> 2; 6756 pipe_id = (entry->ring_id & 0x03) >> 0; 6757 queue_id = (entry->ring_id & 0x70) >> 4; 6758 6759 switch (me_id) { 6760 case 0: 6761 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6762 break; 6763 case 1: 6764 case 2: 6765 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6766 ring = &adev->gfx.compute_ring[i]; 6767 /* Per-queue interrupt is supported for MEC starting from VI. 6768 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6769 */ 6770 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6771 amdgpu_fence_process(ring); 6772 } 6773 break; 6774 } 6775 return 0; 6776 } 6777 6778 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6779 struct amdgpu_irq_src *source, 6780 struct amdgpu_iv_entry *entry) 6781 { 6782 DRM_ERROR("Illegal register access in command stream\n"); 6783 schedule_work(&adev->reset_work); 6784 return 0; 6785 } 6786 6787 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6788 struct amdgpu_irq_src *source, 6789 struct amdgpu_iv_entry *entry) 6790 { 6791 DRM_ERROR("Illegal instruction in command stream\n"); 6792 schedule_work(&adev->reset_work); 6793 return 0; 6794 } 6795 6796 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6797 struct amdgpu_irq_src *src, 6798 unsigned int type, 6799 enum amdgpu_interrupt_state state) 6800 { 6801 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6802 6803 switch (type) { 6804 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6805 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6806 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6807 if (ring->me == 1) 6808 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6809 ring->pipe, 6810 GENERIC2_INT_ENABLE, 6811 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6812 else 6813 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6814 ring->pipe, 6815 GENERIC2_INT_ENABLE, 6816 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6817 break; 6818 default: 6819 BUG(); /* kiq only support GENERIC2_INT now */ 6820 break; 6821 } 6822 return 0; 6823 } 6824 6825 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 6826 struct amdgpu_irq_src *source, 6827 struct amdgpu_iv_entry *entry) 6828 { 6829 u8 me_id, pipe_id, queue_id; 6830 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6831 6832 me_id = (entry->ring_id & 0x0c) >> 2; 6833 pipe_id = (entry->ring_id & 0x03) >> 0; 6834 queue_id = (entry->ring_id & 0x70) >> 4; 6835 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 6836 me_id, pipe_id, queue_id); 6837 6838 amdgpu_fence_process(ring); 6839 return 0; 6840 } 6841 6842 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6843 .name = "gfx_v8_0", 6844 .early_init = gfx_v8_0_early_init, 6845 .late_init = gfx_v8_0_late_init, 6846 .sw_init = gfx_v8_0_sw_init, 6847 .sw_fini = gfx_v8_0_sw_fini, 6848 .hw_init = gfx_v8_0_hw_init, 6849 .hw_fini = gfx_v8_0_hw_fini, 6850 .suspend = gfx_v8_0_suspend, 6851 .resume = gfx_v8_0_resume, 6852 .is_idle = gfx_v8_0_is_idle, 6853 .wait_for_idle = gfx_v8_0_wait_for_idle, 6854 .check_soft_reset = gfx_v8_0_check_soft_reset, 6855 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6856 .soft_reset = gfx_v8_0_soft_reset, 6857 .post_soft_reset = gfx_v8_0_post_soft_reset, 6858 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6859 .set_powergating_state = gfx_v8_0_set_powergating_state, 6860 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6861 }; 6862 6863 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6864 .type = AMDGPU_RING_TYPE_GFX, 6865 .align_mask = 0xff, 6866 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6867 .support_64bit_ptrs = false, 6868 .get_rptr = gfx_v8_0_ring_get_rptr, 6869 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6870 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6871 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6872 5 + /* COND_EXEC */ 6873 7 + /* PIPELINE_SYNC */ 6874 19 + /* VM_FLUSH */ 6875 8 + /* FENCE for VM_FLUSH */ 6876 20 + /* GDS switch */ 6877 4 + /* double SWITCH_BUFFER, 6878 the first COND_EXEC jump to the place just 6879 prior to this double SWITCH_BUFFER */ 6880 5 + /* COND_EXEC */ 6881 7 + /* HDP_flush */ 6882 4 + /* VGT_flush */ 6883 14 + /* CE_META */ 6884 31 + /* DE_META */ 6885 3 + /* CNTX_CTRL */ 6886 5 + /* HDP_INVL */ 6887 8 + 8 + /* FENCE x2 */ 6888 2, /* SWITCH_BUFFER */ 6889 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6890 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6891 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6892 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6893 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6894 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6895 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6896 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6897 .test_ring = gfx_v8_0_ring_test_ring, 6898 .test_ib = gfx_v8_0_ring_test_ib, 6899 .insert_nop = amdgpu_ring_insert_nop, 6900 .pad_ib = amdgpu_ring_generic_pad_ib, 6901 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6902 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6903 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6904 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6905 }; 6906 6907 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6908 .type = AMDGPU_RING_TYPE_COMPUTE, 6909 .align_mask = 0xff, 6910 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6911 .support_64bit_ptrs = false, 6912 .get_rptr = gfx_v8_0_ring_get_rptr, 6913 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6914 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6915 .emit_frame_size = 6916 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6917 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6918 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6919 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6920 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6921 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6922 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6923 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6924 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6925 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6926 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6927 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6928 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6929 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6930 .test_ring = gfx_v8_0_ring_test_ring, 6931 .test_ib = gfx_v8_0_ring_test_ib, 6932 .insert_nop = amdgpu_ring_insert_nop, 6933 .pad_ib = amdgpu_ring_generic_pad_ib, 6934 .set_priority = gfx_v8_0_ring_set_priority_compute, 6935 }; 6936 6937 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6938 .type = AMDGPU_RING_TYPE_KIQ, 6939 .align_mask = 0xff, 6940 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6941 .support_64bit_ptrs = false, 6942 .get_rptr = gfx_v8_0_ring_get_rptr, 6943 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6944 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6945 .emit_frame_size = 6946 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6947 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6948 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6949 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6950 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6951 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6952 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6953 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6954 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6955 .test_ring = gfx_v8_0_ring_test_ring, 6956 .test_ib = gfx_v8_0_ring_test_ib, 6957 .insert_nop = amdgpu_ring_insert_nop, 6958 .pad_ib = amdgpu_ring_generic_pad_ib, 6959 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6960 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6961 }; 6962 6963 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6964 { 6965 int i; 6966 6967 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6968 6969 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6970 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6971 6972 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6973 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6974 } 6975 6976 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6977 .set = gfx_v8_0_set_eop_interrupt_state, 6978 .process = gfx_v8_0_eop_irq, 6979 }; 6980 6981 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6982 .set = gfx_v8_0_set_priv_reg_fault_state, 6983 .process = gfx_v8_0_priv_reg_irq, 6984 }; 6985 6986 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6987 .set = gfx_v8_0_set_priv_inst_fault_state, 6988 .process = gfx_v8_0_priv_inst_irq, 6989 }; 6990 6991 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 6992 .set = gfx_v8_0_kiq_set_interrupt_state, 6993 .process = gfx_v8_0_kiq_irq, 6994 }; 6995 6996 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6997 { 6998 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6999 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7000 7001 adev->gfx.priv_reg_irq.num_types = 1; 7002 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7003 7004 adev->gfx.priv_inst_irq.num_types = 1; 7005 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7006 7007 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7008 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7009 } 7010 7011 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7012 { 7013 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7014 } 7015 7016 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7017 { 7018 /* init asci gds info */ 7019 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7020 adev->gds.gws.total_size = 64; 7021 adev->gds.oa.total_size = 16; 7022 7023 if (adev->gds.mem.total_size == 64 * 1024) { 7024 adev->gds.mem.gfx_partition_size = 4096; 7025 adev->gds.mem.cs_partition_size = 4096; 7026 7027 adev->gds.gws.gfx_partition_size = 4; 7028 adev->gds.gws.cs_partition_size = 4; 7029 7030 adev->gds.oa.gfx_partition_size = 4; 7031 adev->gds.oa.cs_partition_size = 1; 7032 } else { 7033 adev->gds.mem.gfx_partition_size = 1024; 7034 adev->gds.mem.cs_partition_size = 1024; 7035 7036 adev->gds.gws.gfx_partition_size = 16; 7037 adev->gds.gws.cs_partition_size = 16; 7038 7039 adev->gds.oa.gfx_partition_size = 4; 7040 adev->gds.oa.cs_partition_size = 4; 7041 } 7042 } 7043 7044 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7045 u32 bitmap) 7046 { 7047 u32 data; 7048 7049 if (!bitmap) 7050 return; 7051 7052 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7053 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7054 7055 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7056 } 7057 7058 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7059 { 7060 u32 data, mask; 7061 7062 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7063 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7064 7065 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7066 7067 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7068 } 7069 7070 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7071 { 7072 int i, j, k, counter, active_cu_number = 0; 7073 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7074 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7075 unsigned disable_masks[4 * 2]; 7076 u32 ao_cu_num; 7077 7078 memset(cu_info, 0, sizeof(*cu_info)); 7079 7080 if (adev->flags & AMD_IS_APU) 7081 ao_cu_num = 2; 7082 else 7083 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7084 7085 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7086 7087 mutex_lock(&adev->grbm_idx_mutex); 7088 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7089 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7090 mask = 1; 7091 ao_bitmap = 0; 7092 counter = 0; 7093 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7094 if (i < 4 && j < 2) 7095 gfx_v8_0_set_user_cu_inactive_bitmap( 7096 adev, disable_masks[i * 2 + j]); 7097 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7098 cu_info->bitmap[i][j] = bitmap; 7099 7100 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7101 if (bitmap & mask) { 7102 if (counter < ao_cu_num) 7103 ao_bitmap |= mask; 7104 counter ++; 7105 } 7106 mask <<= 1; 7107 } 7108 active_cu_number += counter; 7109 if (i < 2 && j < 2) 7110 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7111 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7112 } 7113 } 7114 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7115 mutex_unlock(&adev->grbm_idx_mutex); 7116 7117 cu_info->number = active_cu_number; 7118 cu_info->ao_cu_mask = ao_cu_mask; 7119 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7120 cu_info->max_waves_per_simd = 10; 7121 cu_info->max_scratch_slots_per_cu = 32; 7122 cu_info->wave_front_size = 64; 7123 cu_info->lds_size = 64; 7124 } 7125 7126 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7127 { 7128 .type = AMD_IP_BLOCK_TYPE_GFX, 7129 .major = 8, 7130 .minor = 0, 7131 .rev = 0, 7132 .funcs = &gfx_v8_0_ip_funcs, 7133 }; 7134 7135 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7136 { 7137 .type = AMD_IP_BLOCK_TYPE_GFX, 7138 .major = 8, 7139 .minor = 1, 7140 .rev = 0, 7141 .funcs = &gfx_v8_0_ip_funcs, 7142 }; 7143 7144 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7145 { 7146 uint64_t ce_payload_addr; 7147 int cnt_ce; 7148 union { 7149 struct vi_ce_ib_state regular; 7150 struct vi_ce_ib_state_chained_ib chained; 7151 } ce_payload = {}; 7152 7153 if (ring->adev->virt.chained_ib_support) { 7154 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7155 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7156 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7157 } else { 7158 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7159 offsetof(struct vi_gfx_meta_data, ce_payload); 7160 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7161 } 7162 7163 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7164 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7165 WRITE_DATA_DST_SEL(8) | 7166 WR_CONFIRM) | 7167 WRITE_DATA_CACHE_POLICY(0)); 7168 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7169 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7170 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7171 } 7172 7173 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7174 { 7175 uint64_t de_payload_addr, gds_addr, csa_addr; 7176 int cnt_de; 7177 union { 7178 struct vi_de_ib_state regular; 7179 struct vi_de_ib_state_chained_ib chained; 7180 } de_payload = {}; 7181 7182 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 7183 gds_addr = csa_addr + 4096; 7184 if (ring->adev->virt.chained_ib_support) { 7185 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7186 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7187 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7188 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7189 } else { 7190 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7191 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7192 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7193 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7194 } 7195 7196 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7197 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7198 WRITE_DATA_DST_SEL(8) | 7199 WR_CONFIRM) | 7200 WRITE_DATA_CACHE_POLICY(0)); 7201 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7202 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7203 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7204 } 7205