1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_MEC_HPD_SIZE 2048 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 139 140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 151 152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 163 164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 165 { 166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 167 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 168 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 169 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 170 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 171 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 172 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 173 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 174 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 175 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 176 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 177 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 178 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 179 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 180 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 181 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 182 }; 183 184 static const u32 golden_settings_tonga_a11[] = 185 { 186 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 187 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 188 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 189 mmGB_GPU_ID, 0x0000000f, 0x00000000, 190 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 191 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 192 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 193 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 194 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 195 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 196 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 197 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 198 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 199 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 200 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 201 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 202 }; 203 204 static const u32 tonga_golden_common_all[] = 205 { 206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 207 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 208 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 209 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 210 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 211 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 212 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 213 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 214 }; 215 216 static const u32 tonga_mgcg_cgcg_init[] = 217 { 218 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 220 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 221 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 222 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 223 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 224 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 225 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 227 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 228 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 229 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 234 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 236 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 237 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 238 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 239 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 240 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 243 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 244 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 245 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 246 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 247 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 248 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 249 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 265 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 266 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 267 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 268 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 269 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 270 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 271 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 272 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 273 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 274 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 275 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 276 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 277 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 278 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 279 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 280 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 281 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 282 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 283 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 284 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 285 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 286 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 287 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 288 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 289 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 290 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 291 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 293 }; 294 295 static const u32 golden_settings_polaris11_a11[] = 296 { 297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 298 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 299 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 300 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 301 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 302 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 303 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 304 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 305 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 306 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 307 mmSQ_CONFIG, 0x07f80000, 0x01180000, 308 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 309 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 310 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 311 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 312 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 314 }; 315 316 static const u32 polaris11_golden_common_all[] = 317 { 318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 319 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 320 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 321 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 322 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 323 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 324 }; 325 326 static const u32 golden_settings_polaris10_a11[] = 327 { 328 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 329 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 330 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 331 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 332 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 333 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 334 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 335 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 336 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 337 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 338 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 339 mmSQ_CONFIG, 0x07f80000, 0x07180000, 340 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 341 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 342 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 343 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 344 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 345 }; 346 347 static const u32 polaris10_golden_common_all[] = 348 { 349 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 350 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 351 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 352 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 353 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 354 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 355 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 356 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 357 }; 358 359 static const u32 fiji_golden_common_all[] = 360 { 361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 362 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 363 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 364 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 365 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 366 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 367 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 368 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 370 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 371 }; 372 373 static const u32 golden_settings_fiji_a10[] = 374 { 375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 376 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 379 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 380 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 381 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 382 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 383 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 384 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 385 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 386 }; 387 388 static const u32 fiji_mgcg_cgcg_init[] = 389 { 390 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 391 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 392 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 393 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 394 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 395 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 396 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 397 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 399 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 400 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 401 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 402 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 403 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 404 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 405 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 406 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 407 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 408 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 409 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 410 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 411 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 412 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 413 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 414 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 415 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 416 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 417 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 418 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 419 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 421 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 422 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 423 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 424 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 425 }; 426 427 static const u32 golden_settings_iceland_a11[] = 428 { 429 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 430 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 431 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 432 mmGB_GPU_ID, 0x0000000f, 0x00000000, 433 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 434 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 435 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 436 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 437 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 438 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 439 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 440 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 441 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 442 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 443 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 444 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 445 }; 446 447 static const u32 iceland_golden_common_all[] = 448 { 449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 450 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 451 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 452 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 453 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 454 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 455 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 456 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 457 }; 458 459 static const u32 iceland_mgcg_cgcg_init[] = 460 { 461 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 463 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 464 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 465 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 466 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 467 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 468 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 469 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 470 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 471 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 472 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 473 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 474 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 475 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 476 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 477 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 478 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 479 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 480 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 481 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 482 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 483 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 484 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 485 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 486 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 487 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 488 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 489 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 490 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 491 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 492 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 493 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 494 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 495 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 496 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 497 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 498 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 499 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 500 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 501 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 502 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 503 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 504 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 505 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 506 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 507 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 508 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 509 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 510 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 511 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 512 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 513 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 514 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 515 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 516 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 517 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 518 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 519 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 520 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 521 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 522 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 523 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 524 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 525 }; 526 527 static const u32 cz_golden_settings_a11[] = 528 { 529 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 530 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 531 mmGB_GPU_ID, 0x0000000f, 0x00000000, 532 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 533 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 534 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 535 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 536 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 537 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 538 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 539 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 540 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 541 }; 542 543 static const u32 cz_golden_common_all[] = 544 { 545 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 546 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 547 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 548 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 549 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 550 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 551 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 552 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 553 }; 554 555 static const u32 cz_mgcg_cgcg_init[] = 556 { 557 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 558 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 559 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 560 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 561 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 562 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 563 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 564 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 566 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 567 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 568 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 569 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 570 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 571 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 572 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 573 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 574 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 575 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 576 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 577 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 578 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 579 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 580 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 581 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 582 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 583 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 584 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 585 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 586 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 587 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 588 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 589 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 590 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 591 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 592 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 593 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 594 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 595 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 596 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 597 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 598 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 599 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 600 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 601 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 602 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 603 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 604 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 605 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 606 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 607 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 608 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 609 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 610 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 611 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 612 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 613 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 614 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 615 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 616 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 617 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 618 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 619 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 620 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 621 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 622 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 623 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 624 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 625 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 626 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 627 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 628 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 629 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 631 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 632 }; 633 634 static const u32 stoney_golden_settings_a11[] = 635 { 636 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 637 mmGB_GPU_ID, 0x0000000f, 0x00000000, 638 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 639 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 640 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 641 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 642 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 643 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 644 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 645 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 646 }; 647 648 static const u32 stoney_golden_common_all[] = 649 { 650 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 651 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 652 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 653 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 654 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 655 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 656 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 657 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 658 }; 659 660 static const u32 stoney_mgcg_cgcg_init[] = 661 { 662 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 663 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 664 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 665 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 667 }; 668 669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 677 678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 679 { 680 switch (adev->asic_type) { 681 case CHIP_TOPAZ: 682 amdgpu_program_register_sequence(adev, 683 iceland_mgcg_cgcg_init, 684 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 685 amdgpu_program_register_sequence(adev, 686 golden_settings_iceland_a11, 687 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 688 amdgpu_program_register_sequence(adev, 689 iceland_golden_common_all, 690 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 691 break; 692 case CHIP_FIJI: 693 amdgpu_program_register_sequence(adev, 694 fiji_mgcg_cgcg_init, 695 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 696 amdgpu_program_register_sequence(adev, 697 golden_settings_fiji_a10, 698 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 699 amdgpu_program_register_sequence(adev, 700 fiji_golden_common_all, 701 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 702 break; 703 704 case CHIP_TONGA: 705 amdgpu_program_register_sequence(adev, 706 tonga_mgcg_cgcg_init, 707 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 708 amdgpu_program_register_sequence(adev, 709 golden_settings_tonga_a11, 710 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 711 amdgpu_program_register_sequence(adev, 712 tonga_golden_common_all, 713 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 714 break; 715 case CHIP_POLARIS11: 716 case CHIP_POLARIS12: 717 amdgpu_program_register_sequence(adev, 718 golden_settings_polaris11_a11, 719 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 720 amdgpu_program_register_sequence(adev, 721 polaris11_golden_common_all, 722 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 723 break; 724 case CHIP_POLARIS10: 725 amdgpu_program_register_sequence(adev, 726 golden_settings_polaris10_a11, 727 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 728 amdgpu_program_register_sequence(adev, 729 polaris10_golden_common_all, 730 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 731 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 732 if (adev->pdev->revision == 0xc7 && 733 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 734 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 735 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 736 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 737 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 738 } 739 break; 740 case CHIP_CARRIZO: 741 amdgpu_program_register_sequence(adev, 742 cz_mgcg_cgcg_init, 743 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 744 amdgpu_program_register_sequence(adev, 745 cz_golden_settings_a11, 746 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 747 amdgpu_program_register_sequence(adev, 748 cz_golden_common_all, 749 (const u32)ARRAY_SIZE(cz_golden_common_all)); 750 break; 751 case CHIP_STONEY: 752 amdgpu_program_register_sequence(adev, 753 stoney_mgcg_cgcg_init, 754 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 755 amdgpu_program_register_sequence(adev, 756 stoney_golden_settings_a11, 757 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 758 amdgpu_program_register_sequence(adev, 759 stoney_golden_common_all, 760 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 761 break; 762 default: 763 break; 764 } 765 } 766 767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 768 { 769 adev->gfx.scratch.num_reg = 8; 770 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 772 } 773 774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 775 { 776 struct amdgpu_device *adev = ring->adev; 777 uint32_t scratch; 778 uint32_t tmp = 0; 779 unsigned i; 780 int r; 781 782 r = amdgpu_gfx_scratch_get(adev, &scratch); 783 if (r) { 784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 785 return r; 786 } 787 WREG32(scratch, 0xCAFEDEAD); 788 r = amdgpu_ring_alloc(ring, 3); 789 if (r) { 790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 791 ring->idx, r); 792 amdgpu_gfx_scratch_free(adev, scratch); 793 return r; 794 } 795 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 796 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 797 amdgpu_ring_write(ring, 0xDEADBEEF); 798 amdgpu_ring_commit(ring); 799 800 for (i = 0; i < adev->usec_timeout; i++) { 801 tmp = RREG32(scratch); 802 if (tmp == 0xDEADBEEF) 803 break; 804 DRM_UDELAY(1); 805 } 806 if (i < adev->usec_timeout) { 807 DRM_INFO("ring test on %d succeeded in %d usecs\n", 808 ring->idx, i); 809 } else { 810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 811 ring->idx, scratch, tmp); 812 r = -EINVAL; 813 } 814 amdgpu_gfx_scratch_free(adev, scratch); 815 return r; 816 } 817 818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 819 { 820 struct amdgpu_device *adev = ring->adev; 821 struct amdgpu_ib ib; 822 struct dma_fence *f = NULL; 823 uint32_t scratch; 824 uint32_t tmp = 0; 825 long r; 826 827 r = amdgpu_gfx_scratch_get(adev, &scratch); 828 if (r) { 829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 830 return r; 831 } 832 WREG32(scratch, 0xCAFEDEAD); 833 memset(&ib, 0, sizeof(ib)); 834 r = amdgpu_ib_get(adev, NULL, 256, &ib); 835 if (r) { 836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 837 goto err1; 838 } 839 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 840 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 841 ib.ptr[2] = 0xDEADBEEF; 842 ib.length_dw = 3; 843 844 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 845 if (r) 846 goto err2; 847 848 r = dma_fence_wait_timeout(f, false, timeout); 849 if (r == 0) { 850 DRM_ERROR("amdgpu: IB test timed out.\n"); 851 r = -ETIMEDOUT; 852 goto err2; 853 } else if (r < 0) { 854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 855 goto err2; 856 } 857 tmp = RREG32(scratch); 858 if (tmp == 0xDEADBEEF) { 859 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 860 r = 0; 861 } else { 862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 863 scratch, tmp); 864 r = -EINVAL; 865 } 866 err2: 867 amdgpu_ib_free(adev, &ib, NULL); 868 dma_fence_put(f); 869 err1: 870 amdgpu_gfx_scratch_free(adev, scratch); 871 return r; 872 } 873 874 875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 876 { 877 release_firmware(adev->gfx.pfp_fw); 878 adev->gfx.pfp_fw = NULL; 879 release_firmware(adev->gfx.me_fw); 880 adev->gfx.me_fw = NULL; 881 release_firmware(adev->gfx.ce_fw); 882 adev->gfx.ce_fw = NULL; 883 release_firmware(adev->gfx.rlc_fw); 884 adev->gfx.rlc_fw = NULL; 885 release_firmware(adev->gfx.mec_fw); 886 adev->gfx.mec_fw = NULL; 887 if ((adev->asic_type != CHIP_STONEY) && 888 (adev->asic_type != CHIP_TOPAZ)) 889 release_firmware(adev->gfx.mec2_fw); 890 adev->gfx.mec2_fw = NULL; 891 892 kfree(adev->gfx.rlc.register_list_format); 893 } 894 895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 896 { 897 const char *chip_name; 898 char fw_name[30]; 899 int err; 900 struct amdgpu_firmware_info *info = NULL; 901 const struct common_firmware_header *header = NULL; 902 const struct gfx_firmware_header_v1_0 *cp_hdr; 903 const struct rlc_firmware_header_v2_0 *rlc_hdr; 904 unsigned int *tmp = NULL, i; 905 906 DRM_DEBUG("\n"); 907 908 switch (adev->asic_type) { 909 case CHIP_TOPAZ: 910 chip_name = "topaz"; 911 break; 912 case CHIP_TONGA: 913 chip_name = "tonga"; 914 break; 915 case CHIP_CARRIZO: 916 chip_name = "carrizo"; 917 break; 918 case CHIP_FIJI: 919 chip_name = "fiji"; 920 break; 921 case CHIP_POLARIS11: 922 chip_name = "polaris11"; 923 break; 924 case CHIP_POLARIS10: 925 chip_name = "polaris10"; 926 break; 927 case CHIP_POLARIS12: 928 chip_name = "polaris12"; 929 break; 930 case CHIP_STONEY: 931 chip_name = "stoney"; 932 break; 933 default: 934 BUG(); 935 } 936 937 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 939 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 940 if (err == -ENOENT) { 941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 942 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 943 } 944 } else { 945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 946 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 947 } 948 if (err) 949 goto out; 950 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 951 if (err) 952 goto out; 953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 954 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 955 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 956 957 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 958 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 959 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 960 if (err == -ENOENT) { 961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 962 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 963 } 964 } else { 965 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 966 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 967 } 968 if (err) 969 goto out; 970 err = amdgpu_ucode_validate(adev->gfx.me_fw); 971 if (err) 972 goto out; 973 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 974 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 975 976 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 977 978 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 979 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 980 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 981 if (err == -ENOENT) { 982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 983 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 984 } 985 } else { 986 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 987 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 988 } 989 if (err) 990 goto out; 991 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 992 if (err) 993 goto out; 994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 995 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 996 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 997 998 /* 999 * Support for MCBP/Virtualization in combination with chained IBs is 1000 * formal released on feature version #46 1001 */ 1002 if (adev->gfx.ce_feature_version >= 46 && 1003 adev->gfx.pfp_feature_version >= 46) { 1004 adev->virt.chained_ib_support = true; 1005 DRM_INFO("Chained IB support enabled!\n"); 1006 } else 1007 adev->virt.chained_ib_support = false; 1008 1009 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1010 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1011 if (err) 1012 goto out; 1013 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1014 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1015 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1016 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1017 1018 adev->gfx.rlc.save_and_restore_offset = 1019 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1020 adev->gfx.rlc.clear_state_descriptor_offset = 1021 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1022 adev->gfx.rlc.avail_scratch_ram_locations = 1023 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1024 adev->gfx.rlc.reg_restore_list_size = 1025 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1026 adev->gfx.rlc.reg_list_format_start = 1027 le32_to_cpu(rlc_hdr->reg_list_format_start); 1028 adev->gfx.rlc.reg_list_format_separate_start = 1029 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1030 adev->gfx.rlc.starting_offsets_start = 1031 le32_to_cpu(rlc_hdr->starting_offsets_start); 1032 adev->gfx.rlc.reg_list_format_size_bytes = 1033 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1034 adev->gfx.rlc.reg_list_size_bytes = 1035 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1036 1037 adev->gfx.rlc.register_list_format = 1038 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1039 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1040 1041 if (!adev->gfx.rlc.register_list_format) { 1042 err = -ENOMEM; 1043 goto out; 1044 } 1045 1046 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1047 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1048 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1049 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1050 1051 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1052 1053 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1054 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1055 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1056 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1057 1058 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1059 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1060 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1061 if (err == -ENOENT) { 1062 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1063 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1064 } 1065 } else { 1066 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1067 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1068 } 1069 if (err) 1070 goto out; 1071 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1072 if (err) 1073 goto out; 1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1075 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1076 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1077 1078 if ((adev->asic_type != CHIP_STONEY) && 1079 (adev->asic_type != CHIP_TOPAZ)) { 1080 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1081 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1082 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1083 if (err == -ENOENT) { 1084 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1085 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1086 } 1087 } else { 1088 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1089 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1090 } 1091 if (!err) { 1092 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1093 if (err) 1094 goto out; 1095 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1096 adev->gfx.mec2_fw->data; 1097 adev->gfx.mec2_fw_version = 1098 le32_to_cpu(cp_hdr->header.ucode_version); 1099 adev->gfx.mec2_feature_version = 1100 le32_to_cpu(cp_hdr->ucode_feature_version); 1101 } else { 1102 err = 0; 1103 adev->gfx.mec2_fw = NULL; 1104 } 1105 } 1106 1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1110 info->fw = adev->gfx.pfp_fw; 1111 header = (const struct common_firmware_header *)info->fw->data; 1112 adev->firmware.fw_size += 1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1114 1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1117 info->fw = adev->gfx.me_fw; 1118 header = (const struct common_firmware_header *)info->fw->data; 1119 adev->firmware.fw_size += 1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1121 1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1124 info->fw = adev->gfx.ce_fw; 1125 header = (const struct common_firmware_header *)info->fw->data; 1126 adev->firmware.fw_size += 1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1128 1129 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1130 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1131 info->fw = adev->gfx.rlc_fw; 1132 header = (const struct common_firmware_header *)info->fw->data; 1133 adev->firmware.fw_size += 1134 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1135 1136 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1137 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1138 info->fw = adev->gfx.mec_fw; 1139 header = (const struct common_firmware_header *)info->fw->data; 1140 adev->firmware.fw_size += 1141 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1142 1143 /* we need account JT in */ 1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1145 adev->firmware.fw_size += 1146 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1147 1148 if (amdgpu_sriov_vf(adev)) { 1149 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1150 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1151 info->fw = adev->gfx.mec_fw; 1152 adev->firmware.fw_size += 1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1154 } 1155 1156 if (adev->gfx.mec2_fw) { 1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1158 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1159 info->fw = adev->gfx.mec2_fw; 1160 header = (const struct common_firmware_header *)info->fw->data; 1161 adev->firmware.fw_size += 1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1163 } 1164 1165 } 1166 1167 out: 1168 if (err) { 1169 dev_err(adev->dev, 1170 "gfx8: Failed to load firmware \"%s\"\n", 1171 fw_name); 1172 release_firmware(adev->gfx.pfp_fw); 1173 adev->gfx.pfp_fw = NULL; 1174 release_firmware(adev->gfx.me_fw); 1175 adev->gfx.me_fw = NULL; 1176 release_firmware(adev->gfx.ce_fw); 1177 adev->gfx.ce_fw = NULL; 1178 release_firmware(adev->gfx.rlc_fw); 1179 adev->gfx.rlc_fw = NULL; 1180 release_firmware(adev->gfx.mec_fw); 1181 adev->gfx.mec_fw = NULL; 1182 release_firmware(adev->gfx.mec2_fw); 1183 adev->gfx.mec2_fw = NULL; 1184 } 1185 return err; 1186 } 1187 1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1189 volatile u32 *buffer) 1190 { 1191 u32 count = 0, i; 1192 const struct cs_section_def *sect = NULL; 1193 const struct cs_extent_def *ext = NULL; 1194 1195 if (adev->gfx.rlc.cs_data == NULL) 1196 return; 1197 if (buffer == NULL) 1198 return; 1199 1200 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1201 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1202 1203 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1204 buffer[count++] = cpu_to_le32(0x80000000); 1205 buffer[count++] = cpu_to_le32(0x80000000); 1206 1207 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1208 for (ext = sect->section; ext->extent != NULL; ++ext) { 1209 if (sect->id == SECT_CONTEXT) { 1210 buffer[count++] = 1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1212 buffer[count++] = cpu_to_le32(ext->reg_index - 1213 PACKET3_SET_CONTEXT_REG_START); 1214 for (i = 0; i < ext->reg_count; i++) 1215 buffer[count++] = cpu_to_le32(ext->extent[i]); 1216 } else { 1217 return; 1218 } 1219 } 1220 } 1221 1222 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1223 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1224 PACKET3_SET_CONTEXT_REG_START); 1225 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1226 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1227 1228 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1229 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1230 1231 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1232 buffer[count++] = cpu_to_le32(0); 1233 } 1234 1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1236 { 1237 const __le32 *fw_data; 1238 volatile u32 *dst_ptr; 1239 int me, i, max_me = 4; 1240 u32 bo_offset = 0; 1241 u32 table_offset, table_size; 1242 1243 if (adev->asic_type == CHIP_CARRIZO) 1244 max_me = 5; 1245 1246 /* write the cp table buffer */ 1247 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1248 for (me = 0; me < max_me; me++) { 1249 if (me == 0) { 1250 const struct gfx_firmware_header_v1_0 *hdr = 1251 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1252 fw_data = (const __le32 *) 1253 (adev->gfx.ce_fw->data + 1254 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1255 table_offset = le32_to_cpu(hdr->jt_offset); 1256 table_size = le32_to_cpu(hdr->jt_size); 1257 } else if (me == 1) { 1258 const struct gfx_firmware_header_v1_0 *hdr = 1259 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1260 fw_data = (const __le32 *) 1261 (adev->gfx.pfp_fw->data + 1262 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1263 table_offset = le32_to_cpu(hdr->jt_offset); 1264 table_size = le32_to_cpu(hdr->jt_size); 1265 } else if (me == 2) { 1266 const struct gfx_firmware_header_v1_0 *hdr = 1267 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1268 fw_data = (const __le32 *) 1269 (adev->gfx.me_fw->data + 1270 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1271 table_offset = le32_to_cpu(hdr->jt_offset); 1272 table_size = le32_to_cpu(hdr->jt_size); 1273 } else if (me == 3) { 1274 const struct gfx_firmware_header_v1_0 *hdr = 1275 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1276 fw_data = (const __le32 *) 1277 (adev->gfx.mec_fw->data + 1278 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1279 table_offset = le32_to_cpu(hdr->jt_offset); 1280 table_size = le32_to_cpu(hdr->jt_size); 1281 } else if (me == 4) { 1282 const struct gfx_firmware_header_v1_0 *hdr = 1283 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1284 fw_data = (const __le32 *) 1285 (adev->gfx.mec2_fw->data + 1286 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1287 table_offset = le32_to_cpu(hdr->jt_offset); 1288 table_size = le32_to_cpu(hdr->jt_size); 1289 } 1290 1291 for (i = 0; i < table_size; i ++) { 1292 dst_ptr[bo_offset + i] = 1293 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1294 } 1295 1296 bo_offset += table_size; 1297 } 1298 } 1299 1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1301 { 1302 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1303 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1304 } 1305 1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1307 { 1308 volatile u32 *dst_ptr; 1309 u32 dws; 1310 const struct cs_section_def *cs_data; 1311 int r; 1312 1313 adev->gfx.rlc.cs_data = vi_cs_data; 1314 1315 cs_data = adev->gfx.rlc.cs_data; 1316 1317 if (cs_data) { 1318 /* clear state block */ 1319 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1320 1321 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1322 AMDGPU_GEM_DOMAIN_VRAM, 1323 &adev->gfx.rlc.clear_state_obj, 1324 &adev->gfx.rlc.clear_state_gpu_addr, 1325 (void **)&adev->gfx.rlc.cs_ptr); 1326 if (r) { 1327 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1328 gfx_v8_0_rlc_fini(adev); 1329 return r; 1330 } 1331 1332 /* set up the cs buffer */ 1333 dst_ptr = adev->gfx.rlc.cs_ptr; 1334 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1335 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1336 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1337 } 1338 1339 if ((adev->asic_type == CHIP_CARRIZO) || 1340 (adev->asic_type == CHIP_STONEY)) { 1341 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1342 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1344 &adev->gfx.rlc.cp_table_obj, 1345 &adev->gfx.rlc.cp_table_gpu_addr, 1346 (void **)&adev->gfx.rlc.cp_table_ptr); 1347 if (r) { 1348 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1349 return r; 1350 } 1351 1352 cz_init_cp_jump_table(adev); 1353 1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1362 { 1363 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1364 } 1365 1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1367 { 1368 int r; 1369 u32 *hpd; 1370 size_t mec_hpd_size; 1371 1372 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1373 1374 /* take ownership of the relevant compute queues */ 1375 amdgpu_gfx_compute_queue_acquire(adev); 1376 1377 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1378 1379 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1380 AMDGPU_GEM_DOMAIN_GTT, 1381 &adev->gfx.mec.hpd_eop_obj, 1382 &adev->gfx.mec.hpd_eop_gpu_addr, 1383 (void **)&hpd); 1384 if (r) { 1385 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1386 return r; 1387 } 1388 1389 memset(hpd, 0, mec_hpd_size); 1390 1391 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1392 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1393 1394 return 0; 1395 } 1396 1397 static const u32 vgpr_init_compute_shader[] = 1398 { 1399 0x7e000209, 0x7e020208, 1400 0x7e040207, 0x7e060206, 1401 0x7e080205, 0x7e0a0204, 1402 0x7e0c0203, 0x7e0e0202, 1403 0x7e100201, 0x7e120200, 1404 0x7e140209, 0x7e160208, 1405 0x7e180207, 0x7e1a0206, 1406 0x7e1c0205, 0x7e1e0204, 1407 0x7e200203, 0x7e220202, 1408 0x7e240201, 0x7e260200, 1409 0x7e280209, 0x7e2a0208, 1410 0x7e2c0207, 0x7e2e0206, 1411 0x7e300205, 0x7e320204, 1412 0x7e340203, 0x7e360202, 1413 0x7e380201, 0x7e3a0200, 1414 0x7e3c0209, 0x7e3e0208, 1415 0x7e400207, 0x7e420206, 1416 0x7e440205, 0x7e460204, 1417 0x7e480203, 0x7e4a0202, 1418 0x7e4c0201, 0x7e4e0200, 1419 0x7e500209, 0x7e520208, 1420 0x7e540207, 0x7e560206, 1421 0x7e580205, 0x7e5a0204, 1422 0x7e5c0203, 0x7e5e0202, 1423 0x7e600201, 0x7e620200, 1424 0x7e640209, 0x7e660208, 1425 0x7e680207, 0x7e6a0206, 1426 0x7e6c0205, 0x7e6e0204, 1427 0x7e700203, 0x7e720202, 1428 0x7e740201, 0x7e760200, 1429 0x7e780209, 0x7e7a0208, 1430 0x7e7c0207, 0x7e7e0206, 1431 0xbf8a0000, 0xbf810000, 1432 }; 1433 1434 static const u32 sgpr_init_compute_shader[] = 1435 { 1436 0xbe8a0100, 0xbe8c0102, 1437 0xbe8e0104, 0xbe900106, 1438 0xbe920108, 0xbe940100, 1439 0xbe960102, 0xbe980104, 1440 0xbe9a0106, 0xbe9c0108, 1441 0xbe9e0100, 0xbea00102, 1442 0xbea20104, 0xbea40106, 1443 0xbea60108, 0xbea80100, 1444 0xbeaa0102, 0xbeac0104, 1445 0xbeae0106, 0xbeb00108, 1446 0xbeb20100, 0xbeb40102, 1447 0xbeb60104, 0xbeb80106, 1448 0xbeba0108, 0xbebc0100, 1449 0xbebe0102, 0xbec00104, 1450 0xbec20106, 0xbec40108, 1451 0xbec60100, 0xbec80102, 1452 0xbee60004, 0xbee70005, 1453 0xbeea0006, 0xbeeb0007, 1454 0xbee80008, 0xbee90009, 1455 0xbefc0000, 0xbf8a0000, 1456 0xbf810000, 0x00000000, 1457 }; 1458 1459 static const u32 vgpr_init_regs[] = 1460 { 1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1462 mmCOMPUTE_RESOURCE_LIMITS, 0, 1463 mmCOMPUTE_NUM_THREAD_X, 256*4, 1464 mmCOMPUTE_NUM_THREAD_Y, 1, 1465 mmCOMPUTE_NUM_THREAD_Z, 1, 1466 mmCOMPUTE_PGM_RSRC2, 20, 1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1477 }; 1478 1479 static const u32 sgpr1_init_regs[] = 1480 { 1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1483 mmCOMPUTE_NUM_THREAD_X, 256*5, 1484 mmCOMPUTE_NUM_THREAD_Y, 1, 1485 mmCOMPUTE_NUM_THREAD_Z, 1, 1486 mmCOMPUTE_PGM_RSRC2, 20, 1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1497 }; 1498 1499 static const u32 sgpr2_init_regs[] = 1500 { 1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1502 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1503 mmCOMPUTE_NUM_THREAD_X, 256*5, 1504 mmCOMPUTE_NUM_THREAD_Y, 1, 1505 mmCOMPUTE_NUM_THREAD_Z, 1, 1506 mmCOMPUTE_PGM_RSRC2, 20, 1507 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1508 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1509 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1510 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1511 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1512 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1513 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1514 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1515 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1516 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1517 }; 1518 1519 static const u32 sec_ded_counter_registers[] = 1520 { 1521 mmCPC_EDC_ATC_CNT, 1522 mmCPC_EDC_SCRATCH_CNT, 1523 mmCPC_EDC_UCODE_CNT, 1524 mmCPF_EDC_ATC_CNT, 1525 mmCPF_EDC_ROQ_CNT, 1526 mmCPF_EDC_TAG_CNT, 1527 mmCPG_EDC_ATC_CNT, 1528 mmCPG_EDC_DMA_CNT, 1529 mmCPG_EDC_TAG_CNT, 1530 mmDC_EDC_CSINVOC_CNT, 1531 mmDC_EDC_RESTORE_CNT, 1532 mmDC_EDC_STATE_CNT, 1533 mmGDS_EDC_CNT, 1534 mmGDS_EDC_GRBM_CNT, 1535 mmGDS_EDC_OA_DED, 1536 mmSPI_EDC_CNT, 1537 mmSQC_ATC_EDC_GATCL1_CNT, 1538 mmSQC_EDC_CNT, 1539 mmSQ_EDC_DED_CNT, 1540 mmSQ_EDC_INFO, 1541 mmSQ_EDC_SEC_CNT, 1542 mmTCC_EDC_CNT, 1543 mmTCP_ATC_EDC_GATCL1_CNT, 1544 mmTCP_EDC_CNT, 1545 mmTD_EDC_CNT 1546 }; 1547 1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1549 { 1550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1551 struct amdgpu_ib ib; 1552 struct dma_fence *f = NULL; 1553 int r, i; 1554 u32 tmp; 1555 unsigned total_size, vgpr_offset, sgpr_offset; 1556 u64 gpu_addr; 1557 1558 /* only supported on CZ */ 1559 if (adev->asic_type != CHIP_CARRIZO) 1560 return 0; 1561 1562 /* bail if the compute ring is not ready */ 1563 if (!ring->ready) 1564 return 0; 1565 1566 tmp = RREG32(mmGB_EDC_MODE); 1567 WREG32(mmGB_EDC_MODE, 0); 1568 1569 total_size = 1570 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1571 total_size += 1572 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1573 total_size += 1574 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1575 total_size = ALIGN(total_size, 256); 1576 vgpr_offset = total_size; 1577 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1578 sgpr_offset = total_size; 1579 total_size += sizeof(sgpr_init_compute_shader); 1580 1581 /* allocate an indirect buffer to put the commands in */ 1582 memset(&ib, 0, sizeof(ib)); 1583 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1584 if (r) { 1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1586 return r; 1587 } 1588 1589 /* load the compute shaders */ 1590 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1591 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1592 1593 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1594 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1595 1596 /* init the ib length to 0 */ 1597 ib.length_dw = 0; 1598 1599 /* VGPR */ 1600 /* write the register state for the compute dispatch */ 1601 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1603 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1604 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1605 } 1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1607 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1612 1613 /* write dispatch packet */ 1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1615 ib.ptr[ib.length_dw++] = 8; /* x */ 1616 ib.ptr[ib.length_dw++] = 1; /* y */ 1617 ib.ptr[ib.length_dw++] = 1; /* z */ 1618 ib.ptr[ib.length_dw++] = 1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1620 1621 /* write CS partial flush packet */ 1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1624 1625 /* SGPR1 */ 1626 /* write the register state for the compute dispatch */ 1627 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1629 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1630 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1631 } 1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1638 1639 /* write dispatch packet */ 1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1641 ib.ptr[ib.length_dw++] = 8; /* x */ 1642 ib.ptr[ib.length_dw++] = 1; /* y */ 1643 ib.ptr[ib.length_dw++] = 1; /* z */ 1644 ib.ptr[ib.length_dw++] = 1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1646 1647 /* write CS partial flush packet */ 1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1650 1651 /* SGPR2 */ 1652 /* write the register state for the compute dispatch */ 1653 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1655 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1656 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1657 } 1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1664 1665 /* write dispatch packet */ 1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1667 ib.ptr[ib.length_dw++] = 8; /* x */ 1668 ib.ptr[ib.length_dw++] = 1; /* y */ 1669 ib.ptr[ib.length_dw++] = 1; /* z */ 1670 ib.ptr[ib.length_dw++] = 1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1672 1673 /* write CS partial flush packet */ 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1676 1677 /* shedule the ib on the ring */ 1678 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1679 if (r) { 1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1681 goto fail; 1682 } 1683 1684 /* wait for the GPU to finish processing the IB */ 1685 r = dma_fence_wait(f, false); 1686 if (r) { 1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1688 goto fail; 1689 } 1690 1691 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1692 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1693 WREG32(mmGB_EDC_MODE, tmp); 1694 1695 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1696 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1697 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1698 1699 1700 /* read back registers to clear the counters */ 1701 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1702 RREG32(sec_ded_counter_registers[i]); 1703 1704 fail: 1705 amdgpu_ib_free(adev, &ib, NULL); 1706 dma_fence_put(f); 1707 1708 return r; 1709 } 1710 1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1712 { 1713 u32 gb_addr_config; 1714 u32 mc_shared_chmap, mc_arb_ramcfg; 1715 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1716 u32 tmp; 1717 int ret; 1718 1719 switch (adev->asic_type) { 1720 case CHIP_TOPAZ: 1721 adev->gfx.config.max_shader_engines = 1; 1722 adev->gfx.config.max_tile_pipes = 2; 1723 adev->gfx.config.max_cu_per_sh = 6; 1724 adev->gfx.config.max_sh_per_se = 1; 1725 adev->gfx.config.max_backends_per_se = 2; 1726 adev->gfx.config.max_texture_channel_caches = 2; 1727 adev->gfx.config.max_gprs = 256; 1728 adev->gfx.config.max_gs_threads = 32; 1729 adev->gfx.config.max_hw_contexts = 8; 1730 1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1735 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1736 break; 1737 case CHIP_FIJI: 1738 adev->gfx.config.max_shader_engines = 4; 1739 adev->gfx.config.max_tile_pipes = 16; 1740 adev->gfx.config.max_cu_per_sh = 16; 1741 adev->gfx.config.max_sh_per_se = 1; 1742 adev->gfx.config.max_backends_per_se = 4; 1743 adev->gfx.config.max_texture_channel_caches = 16; 1744 adev->gfx.config.max_gprs = 256; 1745 adev->gfx.config.max_gs_threads = 32; 1746 adev->gfx.config.max_hw_contexts = 8; 1747 1748 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1749 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1750 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1751 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1752 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1753 break; 1754 case CHIP_POLARIS11: 1755 case CHIP_POLARIS12: 1756 ret = amdgpu_atombios_get_gfx_info(adev); 1757 if (ret) 1758 return ret; 1759 adev->gfx.config.max_gprs = 256; 1760 adev->gfx.config.max_gs_threads = 32; 1761 adev->gfx.config.max_hw_contexts = 8; 1762 1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1767 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1768 break; 1769 case CHIP_POLARIS10: 1770 ret = amdgpu_atombios_get_gfx_info(adev); 1771 if (ret) 1772 return ret; 1773 adev->gfx.config.max_gprs = 256; 1774 adev->gfx.config.max_gs_threads = 32; 1775 adev->gfx.config.max_hw_contexts = 8; 1776 1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1781 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1782 break; 1783 case CHIP_TONGA: 1784 adev->gfx.config.max_shader_engines = 4; 1785 adev->gfx.config.max_tile_pipes = 8; 1786 adev->gfx.config.max_cu_per_sh = 8; 1787 adev->gfx.config.max_sh_per_se = 1; 1788 adev->gfx.config.max_backends_per_se = 2; 1789 adev->gfx.config.max_texture_channel_caches = 8; 1790 adev->gfx.config.max_gprs = 256; 1791 adev->gfx.config.max_gs_threads = 32; 1792 adev->gfx.config.max_hw_contexts = 8; 1793 1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1799 break; 1800 case CHIP_CARRIZO: 1801 adev->gfx.config.max_shader_engines = 1; 1802 adev->gfx.config.max_tile_pipes = 2; 1803 adev->gfx.config.max_sh_per_se = 1; 1804 adev->gfx.config.max_backends_per_se = 2; 1805 adev->gfx.config.max_cu_per_sh = 8; 1806 adev->gfx.config.max_texture_channel_caches = 2; 1807 adev->gfx.config.max_gprs = 256; 1808 adev->gfx.config.max_gs_threads = 32; 1809 adev->gfx.config.max_hw_contexts = 8; 1810 1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1815 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1816 break; 1817 case CHIP_STONEY: 1818 adev->gfx.config.max_shader_engines = 1; 1819 adev->gfx.config.max_tile_pipes = 2; 1820 adev->gfx.config.max_sh_per_se = 1; 1821 adev->gfx.config.max_backends_per_se = 1; 1822 adev->gfx.config.max_cu_per_sh = 3; 1823 adev->gfx.config.max_texture_channel_caches = 2; 1824 adev->gfx.config.max_gprs = 256; 1825 adev->gfx.config.max_gs_threads = 16; 1826 adev->gfx.config.max_hw_contexts = 8; 1827 1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1833 break; 1834 default: 1835 adev->gfx.config.max_shader_engines = 2; 1836 adev->gfx.config.max_tile_pipes = 4; 1837 adev->gfx.config.max_cu_per_sh = 2; 1838 adev->gfx.config.max_sh_per_se = 1; 1839 adev->gfx.config.max_backends_per_se = 2; 1840 adev->gfx.config.max_texture_channel_caches = 4; 1841 adev->gfx.config.max_gprs = 256; 1842 adev->gfx.config.max_gs_threads = 32; 1843 adev->gfx.config.max_hw_contexts = 8; 1844 1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1849 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1850 break; 1851 } 1852 1853 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1854 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1855 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1856 1857 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1858 adev->gfx.config.mem_max_burst_length_bytes = 256; 1859 if (adev->flags & AMD_IS_APU) { 1860 /* Get memory bank mapping mode. */ 1861 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1862 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1863 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1864 1865 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1866 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1867 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1868 1869 /* Validate settings in case only one DIMM installed. */ 1870 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1871 dimm00_addr_map = 0; 1872 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1873 dimm01_addr_map = 0; 1874 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1875 dimm10_addr_map = 0; 1876 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1877 dimm11_addr_map = 0; 1878 1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1881 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1882 adev->gfx.config.mem_row_size_in_kb = 2; 1883 else 1884 adev->gfx.config.mem_row_size_in_kb = 1; 1885 } else { 1886 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1887 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1888 if (adev->gfx.config.mem_row_size_in_kb > 4) 1889 adev->gfx.config.mem_row_size_in_kb = 4; 1890 } 1891 1892 adev->gfx.config.shader_engine_tile_size = 32; 1893 adev->gfx.config.num_gpus = 1; 1894 adev->gfx.config.multi_gpu_tile_size = 64; 1895 1896 /* fix up row size */ 1897 switch (adev->gfx.config.mem_row_size_in_kb) { 1898 case 1: 1899 default: 1900 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1901 break; 1902 case 2: 1903 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1904 break; 1905 case 4: 1906 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1907 break; 1908 } 1909 adev->gfx.config.gb_addr_config = gb_addr_config; 1910 1911 return 0; 1912 } 1913 1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1915 int mec, int pipe, int queue) 1916 { 1917 int r; 1918 unsigned irq_type; 1919 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1920 1921 ring = &adev->gfx.compute_ring[ring_id]; 1922 1923 /* mec0 is me1 */ 1924 ring->me = mec + 1; 1925 ring->pipe = pipe; 1926 ring->queue = queue; 1927 1928 ring->ring_obj = NULL; 1929 ring->use_doorbell = true; 1930 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 1931 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1932 + (ring_id * GFX8_MEC_HPD_SIZE); 1933 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1934 1935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1936 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1937 + ring->pipe; 1938 1939 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1940 r = amdgpu_ring_init(adev, ring, 1024, 1941 &adev->gfx.eop_irq, irq_type); 1942 if (r) 1943 return r; 1944 1945 1946 return 0; 1947 } 1948 1949 static int gfx_v8_0_sw_init(void *handle) 1950 { 1951 int i, j, k, r, ring_id; 1952 struct amdgpu_ring *ring; 1953 struct amdgpu_kiq *kiq; 1954 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1955 1956 switch (adev->asic_type) { 1957 case CHIP_FIJI: 1958 case CHIP_TONGA: 1959 case CHIP_POLARIS11: 1960 case CHIP_POLARIS12: 1961 case CHIP_POLARIS10: 1962 case CHIP_CARRIZO: 1963 adev->gfx.mec.num_mec = 2; 1964 break; 1965 case CHIP_TOPAZ: 1966 case CHIP_STONEY: 1967 default: 1968 adev->gfx.mec.num_mec = 1; 1969 break; 1970 } 1971 1972 adev->gfx.mec.num_pipe_per_mec = 4; 1973 adev->gfx.mec.num_queue_per_pipe = 8; 1974 1975 /* KIQ event */ 1976 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 1977 if (r) 1978 return r; 1979 1980 /* EOP Event */ 1981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 1982 if (r) 1983 return r; 1984 1985 /* Privileged reg */ 1986 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 1987 &adev->gfx.priv_reg_irq); 1988 if (r) 1989 return r; 1990 1991 /* Privileged inst */ 1992 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 1993 &adev->gfx.priv_inst_irq); 1994 if (r) 1995 return r; 1996 1997 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1998 1999 gfx_v8_0_scratch_init(adev); 2000 2001 r = gfx_v8_0_init_microcode(adev); 2002 if (r) { 2003 DRM_ERROR("Failed to load gfx firmware!\n"); 2004 return r; 2005 } 2006 2007 r = gfx_v8_0_rlc_init(adev); 2008 if (r) { 2009 DRM_ERROR("Failed to init rlc BOs!\n"); 2010 return r; 2011 } 2012 2013 r = gfx_v8_0_mec_init(adev); 2014 if (r) { 2015 DRM_ERROR("Failed to init MEC BOs!\n"); 2016 return r; 2017 } 2018 2019 /* set up the gfx ring */ 2020 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2021 ring = &adev->gfx.gfx_ring[i]; 2022 ring->ring_obj = NULL; 2023 sprintf(ring->name, "gfx"); 2024 /* no gfx doorbells on iceland */ 2025 if (adev->asic_type != CHIP_TOPAZ) { 2026 ring->use_doorbell = true; 2027 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2028 } 2029 2030 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2031 AMDGPU_CP_IRQ_GFX_EOP); 2032 if (r) 2033 return r; 2034 } 2035 2036 2037 /* set up the compute queues - allocate horizontally across pipes */ 2038 ring_id = 0; 2039 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2040 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2041 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2043 continue; 2044 2045 r = gfx_v8_0_compute_ring_init(adev, 2046 ring_id, 2047 i, k, j); 2048 if (r) 2049 return r; 2050 2051 ring_id++; 2052 } 2053 } 2054 } 2055 2056 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2057 if (r) { 2058 DRM_ERROR("Failed to init KIQ BOs!\n"); 2059 return r; 2060 } 2061 2062 kiq = &adev->gfx.kiq; 2063 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2064 if (r) 2065 return r; 2066 2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2068 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2069 if (r) 2070 return r; 2071 2072 /* reserve GDS, GWS and OA resource for gfx */ 2073 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2074 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2075 &adev->gds.gds_gfx_bo, NULL, NULL); 2076 if (r) 2077 return r; 2078 2079 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2080 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2081 &adev->gds.gws_gfx_bo, NULL, NULL); 2082 if (r) 2083 return r; 2084 2085 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2086 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2087 &adev->gds.oa_gfx_bo, NULL, NULL); 2088 if (r) 2089 return r; 2090 2091 adev->gfx.ce_ram_size = 0x8000; 2092 2093 r = gfx_v8_0_gpu_early_init(adev); 2094 if (r) 2095 return r; 2096 2097 return 0; 2098 } 2099 2100 static int gfx_v8_0_sw_fini(void *handle) 2101 { 2102 int i; 2103 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2104 2105 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2106 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2107 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2108 2109 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2110 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2111 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2112 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2113 2114 amdgpu_gfx_compute_mqd_sw_fini(adev); 2115 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2116 amdgpu_gfx_kiq_fini(adev); 2117 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL); 2118 2119 gfx_v8_0_mec_fini(adev); 2120 gfx_v8_0_rlc_fini(adev); 2121 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2122 &adev->gfx.rlc.clear_state_gpu_addr, 2123 (void **)&adev->gfx.rlc.cs_ptr); 2124 if ((adev->asic_type == CHIP_CARRIZO) || 2125 (adev->asic_type == CHIP_STONEY)) { 2126 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2127 &adev->gfx.rlc.cp_table_gpu_addr, 2128 (void **)&adev->gfx.rlc.cp_table_ptr); 2129 } 2130 gfx_v8_0_free_microcode(adev); 2131 2132 return 0; 2133 } 2134 2135 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2136 { 2137 uint32_t *modearray, *mod2array; 2138 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2139 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2140 u32 reg_offset; 2141 2142 modearray = adev->gfx.config.tile_mode_array; 2143 mod2array = adev->gfx.config.macrotile_mode_array; 2144 2145 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2146 modearray[reg_offset] = 0; 2147 2148 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2149 mod2array[reg_offset] = 0; 2150 2151 switch (adev->asic_type) { 2152 case CHIP_TOPAZ: 2153 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2154 PIPE_CONFIG(ADDR_SURF_P2) | 2155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2157 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2158 PIPE_CONFIG(ADDR_SURF_P2) | 2159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2161 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2162 PIPE_CONFIG(ADDR_SURF_P2) | 2163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2165 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2166 PIPE_CONFIG(ADDR_SURF_P2) | 2167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2169 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2170 PIPE_CONFIG(ADDR_SURF_P2) | 2171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2173 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2174 PIPE_CONFIG(ADDR_SURF_P2) | 2175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2177 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2178 PIPE_CONFIG(ADDR_SURF_P2) | 2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2181 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2182 PIPE_CONFIG(ADDR_SURF_P2)); 2183 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2184 PIPE_CONFIG(ADDR_SURF_P2) | 2185 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2187 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2188 PIPE_CONFIG(ADDR_SURF_P2) | 2189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2191 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2192 PIPE_CONFIG(ADDR_SURF_P2) | 2193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2195 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2196 PIPE_CONFIG(ADDR_SURF_P2) | 2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2199 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2200 PIPE_CONFIG(ADDR_SURF_P2) | 2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2203 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2204 PIPE_CONFIG(ADDR_SURF_P2) | 2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2207 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2208 PIPE_CONFIG(ADDR_SURF_P2) | 2209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2211 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2212 PIPE_CONFIG(ADDR_SURF_P2) | 2213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2215 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2216 PIPE_CONFIG(ADDR_SURF_P2) | 2217 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2219 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2220 PIPE_CONFIG(ADDR_SURF_P2) | 2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2223 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2224 PIPE_CONFIG(ADDR_SURF_P2) | 2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2227 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2228 PIPE_CONFIG(ADDR_SURF_P2) | 2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2231 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2232 PIPE_CONFIG(ADDR_SURF_P2) | 2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2235 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2236 PIPE_CONFIG(ADDR_SURF_P2) | 2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2239 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2240 PIPE_CONFIG(ADDR_SURF_P2) | 2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2243 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2244 PIPE_CONFIG(ADDR_SURF_P2) | 2245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2247 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2248 PIPE_CONFIG(ADDR_SURF_P2) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2251 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2252 PIPE_CONFIG(ADDR_SURF_P2) | 2253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2255 2256 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2259 NUM_BANKS(ADDR_SURF_8_BANK)); 2260 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2263 NUM_BANKS(ADDR_SURF_8_BANK)); 2264 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2267 NUM_BANKS(ADDR_SURF_8_BANK)); 2268 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2271 NUM_BANKS(ADDR_SURF_8_BANK)); 2272 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2275 NUM_BANKS(ADDR_SURF_8_BANK)); 2276 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2279 NUM_BANKS(ADDR_SURF_8_BANK)); 2280 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2283 NUM_BANKS(ADDR_SURF_8_BANK)); 2284 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2287 NUM_BANKS(ADDR_SURF_16_BANK)); 2288 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2291 NUM_BANKS(ADDR_SURF_16_BANK)); 2292 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2295 NUM_BANKS(ADDR_SURF_16_BANK)); 2296 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2299 NUM_BANKS(ADDR_SURF_16_BANK)); 2300 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2303 NUM_BANKS(ADDR_SURF_16_BANK)); 2304 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2307 NUM_BANKS(ADDR_SURF_16_BANK)); 2308 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2311 NUM_BANKS(ADDR_SURF_8_BANK)); 2312 2313 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2314 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2315 reg_offset != 23) 2316 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2317 2318 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2319 if (reg_offset != 7) 2320 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2321 2322 break; 2323 case CHIP_FIJI: 2324 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2328 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2332 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2336 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2339 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2340 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2344 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2346 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2348 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2352 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2353 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2356 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2358 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2362 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2366 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2370 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2371 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2372 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2374 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2376 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2378 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2380 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2382 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2384 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2386 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2390 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2391 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2394 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2398 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2402 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2406 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2410 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2414 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2415 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2418 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2422 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2426 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2430 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2434 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2436 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2438 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2442 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2446 2447 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2450 NUM_BANKS(ADDR_SURF_8_BANK)); 2451 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2454 NUM_BANKS(ADDR_SURF_8_BANK)); 2455 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2458 NUM_BANKS(ADDR_SURF_8_BANK)); 2459 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2462 NUM_BANKS(ADDR_SURF_8_BANK)); 2463 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2466 NUM_BANKS(ADDR_SURF_8_BANK)); 2467 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2470 NUM_BANKS(ADDR_SURF_8_BANK)); 2471 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2474 NUM_BANKS(ADDR_SURF_8_BANK)); 2475 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2478 NUM_BANKS(ADDR_SURF_8_BANK)); 2479 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2482 NUM_BANKS(ADDR_SURF_8_BANK)); 2483 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2486 NUM_BANKS(ADDR_SURF_8_BANK)); 2487 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2490 NUM_BANKS(ADDR_SURF_8_BANK)); 2491 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2494 NUM_BANKS(ADDR_SURF_8_BANK)); 2495 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2498 NUM_BANKS(ADDR_SURF_8_BANK)); 2499 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2502 NUM_BANKS(ADDR_SURF_4_BANK)); 2503 2504 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2505 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2506 2507 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2508 if (reg_offset != 7) 2509 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2510 2511 break; 2512 case CHIP_TONGA: 2513 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2517 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2521 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2523 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2525 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2527 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2528 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2529 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2533 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2535 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2537 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2541 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2545 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2547 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2549 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2551 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2553 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2555 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2556 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2559 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2560 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2563 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2567 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2570 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2571 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2575 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2577 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2579 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2580 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2581 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2583 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2587 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2591 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2595 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2599 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2603 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2607 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2611 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2615 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2619 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2623 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2625 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2627 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2629 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2631 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2633 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2635 2636 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2639 NUM_BANKS(ADDR_SURF_16_BANK)); 2640 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2643 NUM_BANKS(ADDR_SURF_16_BANK)); 2644 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2647 NUM_BANKS(ADDR_SURF_16_BANK)); 2648 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2651 NUM_BANKS(ADDR_SURF_16_BANK)); 2652 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2655 NUM_BANKS(ADDR_SURF_16_BANK)); 2656 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2659 NUM_BANKS(ADDR_SURF_16_BANK)); 2660 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2663 NUM_BANKS(ADDR_SURF_16_BANK)); 2664 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2667 NUM_BANKS(ADDR_SURF_16_BANK)); 2668 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2671 NUM_BANKS(ADDR_SURF_16_BANK)); 2672 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2675 NUM_BANKS(ADDR_SURF_16_BANK)); 2676 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2679 NUM_BANKS(ADDR_SURF_16_BANK)); 2680 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2683 NUM_BANKS(ADDR_SURF_8_BANK)); 2684 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2687 NUM_BANKS(ADDR_SURF_4_BANK)); 2688 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2691 NUM_BANKS(ADDR_SURF_4_BANK)); 2692 2693 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2694 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2695 2696 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2697 if (reg_offset != 7) 2698 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2699 2700 break; 2701 case CHIP_POLARIS11: 2702 case CHIP_POLARIS12: 2703 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2707 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2711 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2713 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2714 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2715 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2718 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2719 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2721 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2723 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2725 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2726 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2727 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2731 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2735 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2736 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2737 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2739 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2741 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2745 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2749 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2753 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2757 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2761 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2765 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2767 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2769 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2773 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2777 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2781 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2785 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2789 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2793 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2797 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2801 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2805 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2809 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2811 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2813 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2815 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2817 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2821 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2825 2826 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2829 NUM_BANKS(ADDR_SURF_16_BANK)); 2830 2831 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2834 NUM_BANKS(ADDR_SURF_16_BANK)); 2835 2836 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2839 NUM_BANKS(ADDR_SURF_16_BANK)); 2840 2841 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2844 NUM_BANKS(ADDR_SURF_16_BANK)); 2845 2846 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2849 NUM_BANKS(ADDR_SURF_16_BANK)); 2850 2851 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2854 NUM_BANKS(ADDR_SURF_16_BANK)); 2855 2856 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2859 NUM_BANKS(ADDR_SURF_16_BANK)); 2860 2861 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2864 NUM_BANKS(ADDR_SURF_16_BANK)); 2865 2866 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2869 NUM_BANKS(ADDR_SURF_16_BANK)); 2870 2871 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2874 NUM_BANKS(ADDR_SURF_16_BANK)); 2875 2876 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2879 NUM_BANKS(ADDR_SURF_16_BANK)); 2880 2881 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2884 NUM_BANKS(ADDR_SURF_16_BANK)); 2885 2886 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2889 NUM_BANKS(ADDR_SURF_8_BANK)); 2890 2891 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2894 NUM_BANKS(ADDR_SURF_4_BANK)); 2895 2896 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2897 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2898 2899 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2900 if (reg_offset != 7) 2901 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2902 2903 break; 2904 case CHIP_POLARIS10: 2905 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2909 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2912 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2913 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2917 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2919 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2921 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2924 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2925 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2927 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2929 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2933 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2934 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2937 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2939 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2941 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2943 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2945 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2947 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2949 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2951 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2952 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2953 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2955 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2959 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2962 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2963 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2967 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2971 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2972 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2975 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2979 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2983 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2987 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2991 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2995 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2996 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2999 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3003 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3007 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3011 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3013 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3015 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3017 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3019 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3021 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3023 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3024 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3025 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3027 3028 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3031 NUM_BANKS(ADDR_SURF_16_BANK)); 3032 3033 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3036 NUM_BANKS(ADDR_SURF_16_BANK)); 3037 3038 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3041 NUM_BANKS(ADDR_SURF_16_BANK)); 3042 3043 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3046 NUM_BANKS(ADDR_SURF_16_BANK)); 3047 3048 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3051 NUM_BANKS(ADDR_SURF_16_BANK)); 3052 3053 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3056 NUM_BANKS(ADDR_SURF_16_BANK)); 3057 3058 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3061 NUM_BANKS(ADDR_SURF_16_BANK)); 3062 3063 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3066 NUM_BANKS(ADDR_SURF_16_BANK)); 3067 3068 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3071 NUM_BANKS(ADDR_SURF_16_BANK)); 3072 3073 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3076 NUM_BANKS(ADDR_SURF_16_BANK)); 3077 3078 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3081 NUM_BANKS(ADDR_SURF_16_BANK)); 3082 3083 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3086 NUM_BANKS(ADDR_SURF_8_BANK)); 3087 3088 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3091 NUM_BANKS(ADDR_SURF_4_BANK)); 3092 3093 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3096 NUM_BANKS(ADDR_SURF_4_BANK)); 3097 3098 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3099 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3100 3101 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3102 if (reg_offset != 7) 3103 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3104 3105 break; 3106 case CHIP_STONEY: 3107 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3108 PIPE_CONFIG(ADDR_SURF_P2) | 3109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3111 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3112 PIPE_CONFIG(ADDR_SURF_P2) | 3113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3115 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3116 PIPE_CONFIG(ADDR_SURF_P2) | 3117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3119 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3120 PIPE_CONFIG(ADDR_SURF_P2) | 3121 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3123 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3124 PIPE_CONFIG(ADDR_SURF_P2) | 3125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3126 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3127 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3128 PIPE_CONFIG(ADDR_SURF_P2) | 3129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3131 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3132 PIPE_CONFIG(ADDR_SURF_P2) | 3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3135 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3136 PIPE_CONFIG(ADDR_SURF_P2)); 3137 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3138 PIPE_CONFIG(ADDR_SURF_P2) | 3139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3141 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3142 PIPE_CONFIG(ADDR_SURF_P2) | 3143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3145 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3146 PIPE_CONFIG(ADDR_SURF_P2) | 3147 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3149 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3150 PIPE_CONFIG(ADDR_SURF_P2) | 3151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3153 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3154 PIPE_CONFIG(ADDR_SURF_P2) | 3155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3157 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3158 PIPE_CONFIG(ADDR_SURF_P2) | 3159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3161 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3162 PIPE_CONFIG(ADDR_SURF_P2) | 3163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3165 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3166 PIPE_CONFIG(ADDR_SURF_P2) | 3167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3169 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3170 PIPE_CONFIG(ADDR_SURF_P2) | 3171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3173 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3174 PIPE_CONFIG(ADDR_SURF_P2) | 3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3177 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3178 PIPE_CONFIG(ADDR_SURF_P2) | 3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3181 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3182 PIPE_CONFIG(ADDR_SURF_P2) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3185 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3186 PIPE_CONFIG(ADDR_SURF_P2) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3189 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3190 PIPE_CONFIG(ADDR_SURF_P2) | 3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3193 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3194 PIPE_CONFIG(ADDR_SURF_P2) | 3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3197 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3198 PIPE_CONFIG(ADDR_SURF_P2) | 3199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3201 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3202 PIPE_CONFIG(ADDR_SURF_P2) | 3203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3205 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3206 PIPE_CONFIG(ADDR_SURF_P2) | 3207 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3209 3210 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3213 NUM_BANKS(ADDR_SURF_8_BANK)); 3214 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3217 NUM_BANKS(ADDR_SURF_8_BANK)); 3218 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3221 NUM_BANKS(ADDR_SURF_8_BANK)); 3222 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3225 NUM_BANKS(ADDR_SURF_8_BANK)); 3226 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3229 NUM_BANKS(ADDR_SURF_8_BANK)); 3230 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3233 NUM_BANKS(ADDR_SURF_8_BANK)); 3234 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3237 NUM_BANKS(ADDR_SURF_8_BANK)); 3238 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3241 NUM_BANKS(ADDR_SURF_16_BANK)); 3242 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3245 NUM_BANKS(ADDR_SURF_16_BANK)); 3246 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3249 NUM_BANKS(ADDR_SURF_16_BANK)); 3250 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3253 NUM_BANKS(ADDR_SURF_16_BANK)); 3254 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3257 NUM_BANKS(ADDR_SURF_16_BANK)); 3258 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3261 NUM_BANKS(ADDR_SURF_16_BANK)); 3262 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3265 NUM_BANKS(ADDR_SURF_8_BANK)); 3266 3267 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3268 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3269 reg_offset != 23) 3270 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3271 3272 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3273 if (reg_offset != 7) 3274 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3275 3276 break; 3277 default: 3278 dev_warn(adev->dev, 3279 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3280 adev->asic_type); 3281 3282 case CHIP_CARRIZO: 3283 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3287 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3291 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3295 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3299 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3303 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3307 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3311 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3312 PIPE_CONFIG(ADDR_SURF_P2)); 3313 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3314 PIPE_CONFIG(ADDR_SURF_P2) | 3315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3317 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3318 PIPE_CONFIG(ADDR_SURF_P2) | 3319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3321 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3322 PIPE_CONFIG(ADDR_SURF_P2) | 3323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3325 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3326 PIPE_CONFIG(ADDR_SURF_P2) | 3327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3329 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3330 PIPE_CONFIG(ADDR_SURF_P2) | 3331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3333 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3334 PIPE_CONFIG(ADDR_SURF_P2) | 3335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3337 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3338 PIPE_CONFIG(ADDR_SURF_P2) | 3339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3341 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3342 PIPE_CONFIG(ADDR_SURF_P2) | 3343 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3345 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3346 PIPE_CONFIG(ADDR_SURF_P2) | 3347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3349 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3350 PIPE_CONFIG(ADDR_SURF_P2) | 3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3353 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3354 PIPE_CONFIG(ADDR_SURF_P2) | 3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3357 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3358 PIPE_CONFIG(ADDR_SURF_P2) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3361 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3362 PIPE_CONFIG(ADDR_SURF_P2) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3365 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3366 PIPE_CONFIG(ADDR_SURF_P2) | 3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3369 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3370 PIPE_CONFIG(ADDR_SURF_P2) | 3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3373 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3374 PIPE_CONFIG(ADDR_SURF_P2) | 3375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3377 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3378 PIPE_CONFIG(ADDR_SURF_P2) | 3379 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3381 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3382 PIPE_CONFIG(ADDR_SURF_P2) | 3383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3385 3386 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3389 NUM_BANKS(ADDR_SURF_8_BANK)); 3390 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3393 NUM_BANKS(ADDR_SURF_8_BANK)); 3394 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3397 NUM_BANKS(ADDR_SURF_8_BANK)); 3398 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3401 NUM_BANKS(ADDR_SURF_8_BANK)); 3402 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3405 NUM_BANKS(ADDR_SURF_8_BANK)); 3406 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3409 NUM_BANKS(ADDR_SURF_8_BANK)); 3410 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3413 NUM_BANKS(ADDR_SURF_8_BANK)); 3414 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3417 NUM_BANKS(ADDR_SURF_16_BANK)); 3418 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3421 NUM_BANKS(ADDR_SURF_16_BANK)); 3422 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3425 NUM_BANKS(ADDR_SURF_16_BANK)); 3426 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3429 NUM_BANKS(ADDR_SURF_16_BANK)); 3430 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3433 NUM_BANKS(ADDR_SURF_16_BANK)); 3434 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3437 NUM_BANKS(ADDR_SURF_16_BANK)); 3438 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3441 NUM_BANKS(ADDR_SURF_8_BANK)); 3442 3443 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3444 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3445 reg_offset != 23) 3446 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3447 3448 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3449 if (reg_offset != 7) 3450 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3451 3452 break; 3453 } 3454 } 3455 3456 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3457 u32 se_num, u32 sh_num, u32 instance) 3458 { 3459 u32 data; 3460 3461 if (instance == 0xffffffff) 3462 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3463 else 3464 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3465 3466 if (se_num == 0xffffffff) 3467 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3468 else 3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3470 3471 if (sh_num == 0xffffffff) 3472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3473 else 3474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3475 3476 WREG32(mmGRBM_GFX_INDEX, data); 3477 } 3478 3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3480 { 3481 u32 data, mask; 3482 3483 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3484 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3485 3486 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3487 3488 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3489 adev->gfx.config.max_sh_per_se); 3490 3491 return (~data) & mask; 3492 } 3493 3494 static void 3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3496 { 3497 switch (adev->asic_type) { 3498 case CHIP_FIJI: 3499 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3500 RB_XSEL2(1) | PKR_MAP(2) | 3501 PKR_XSEL(1) | PKR_YSEL(1) | 3502 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3503 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3504 SE_PAIR_YSEL(2); 3505 break; 3506 case CHIP_TONGA: 3507 case CHIP_POLARIS10: 3508 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3509 SE_XSEL(1) | SE_YSEL(1); 3510 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3511 SE_PAIR_YSEL(2); 3512 break; 3513 case CHIP_TOPAZ: 3514 case CHIP_CARRIZO: 3515 *rconf |= RB_MAP_PKR0(2); 3516 *rconf1 |= 0x0; 3517 break; 3518 case CHIP_POLARIS11: 3519 case CHIP_POLARIS12: 3520 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3521 SE_XSEL(1) | SE_YSEL(1); 3522 *rconf1 |= 0x0; 3523 break; 3524 case CHIP_STONEY: 3525 *rconf |= 0x0; 3526 *rconf1 |= 0x0; 3527 break; 3528 default: 3529 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3530 break; 3531 } 3532 } 3533 3534 static void 3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3536 u32 raster_config, u32 raster_config_1, 3537 unsigned rb_mask, unsigned num_rb) 3538 { 3539 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3540 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3541 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3542 unsigned rb_per_se = num_rb / num_se; 3543 unsigned se_mask[4]; 3544 unsigned se; 3545 3546 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3547 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3548 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3549 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3550 3551 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3552 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3553 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3554 3555 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3556 (!se_mask[2] && !se_mask[3]))) { 3557 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3558 3559 if (!se_mask[0] && !se_mask[1]) { 3560 raster_config_1 |= 3561 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3562 } else { 3563 raster_config_1 |= 3564 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3565 } 3566 } 3567 3568 for (se = 0; se < num_se; se++) { 3569 unsigned raster_config_se = raster_config; 3570 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3571 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3572 int idx = (se / 2) * 2; 3573 3574 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3575 raster_config_se &= ~SE_MAP_MASK; 3576 3577 if (!se_mask[idx]) { 3578 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3579 } else { 3580 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3581 } 3582 } 3583 3584 pkr0_mask &= rb_mask; 3585 pkr1_mask &= rb_mask; 3586 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3587 raster_config_se &= ~PKR_MAP_MASK; 3588 3589 if (!pkr0_mask) { 3590 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3591 } else { 3592 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3593 } 3594 } 3595 3596 if (rb_per_se >= 2) { 3597 unsigned rb0_mask = 1 << (se * rb_per_se); 3598 unsigned rb1_mask = rb0_mask << 1; 3599 3600 rb0_mask &= rb_mask; 3601 rb1_mask &= rb_mask; 3602 if (!rb0_mask || !rb1_mask) { 3603 raster_config_se &= ~RB_MAP_PKR0_MASK; 3604 3605 if (!rb0_mask) { 3606 raster_config_se |= 3607 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3608 } else { 3609 raster_config_se |= 3610 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3611 } 3612 } 3613 3614 if (rb_per_se > 2) { 3615 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3616 rb1_mask = rb0_mask << 1; 3617 rb0_mask &= rb_mask; 3618 rb1_mask &= rb_mask; 3619 if (!rb0_mask || !rb1_mask) { 3620 raster_config_se &= ~RB_MAP_PKR1_MASK; 3621 3622 if (!rb0_mask) { 3623 raster_config_se |= 3624 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3625 } else { 3626 raster_config_se |= 3627 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3628 } 3629 } 3630 } 3631 } 3632 3633 /* GRBM_GFX_INDEX has a different offset on VI */ 3634 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3635 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3636 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3637 } 3638 3639 /* GRBM_GFX_INDEX has a different offset on VI */ 3640 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3641 } 3642 3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3644 { 3645 int i, j; 3646 u32 data; 3647 u32 raster_config = 0, raster_config_1 = 0; 3648 u32 active_rbs = 0; 3649 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3650 adev->gfx.config.max_sh_per_se; 3651 unsigned num_rb_pipes; 3652 3653 mutex_lock(&adev->grbm_idx_mutex); 3654 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3655 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3656 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3657 data = gfx_v8_0_get_rb_active_bitmap(adev); 3658 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3659 rb_bitmap_width_per_sh); 3660 } 3661 } 3662 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3663 3664 adev->gfx.config.backend_enable_mask = active_rbs; 3665 adev->gfx.config.num_rbs = hweight32(active_rbs); 3666 3667 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3668 adev->gfx.config.max_shader_engines, 16); 3669 3670 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3671 3672 if (!adev->gfx.config.backend_enable_mask || 3673 adev->gfx.config.num_rbs >= num_rb_pipes) { 3674 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3675 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3676 } else { 3677 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3678 adev->gfx.config.backend_enable_mask, 3679 num_rb_pipes); 3680 } 3681 3682 /* cache the values for userspace */ 3683 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3684 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3685 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3686 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3687 RREG32(mmCC_RB_BACKEND_DISABLE); 3688 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3689 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3690 adev->gfx.config.rb_config[i][j].raster_config = 3691 RREG32(mmPA_SC_RASTER_CONFIG); 3692 adev->gfx.config.rb_config[i][j].raster_config_1 = 3693 RREG32(mmPA_SC_RASTER_CONFIG_1); 3694 } 3695 } 3696 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3697 mutex_unlock(&adev->grbm_idx_mutex); 3698 } 3699 3700 /** 3701 * gfx_v8_0_init_compute_vmid - gart enable 3702 * 3703 * @adev: amdgpu_device pointer 3704 * 3705 * Initialize compute vmid sh_mem registers 3706 * 3707 */ 3708 #define DEFAULT_SH_MEM_BASES (0x6000) 3709 #define FIRST_COMPUTE_VMID (8) 3710 #define LAST_COMPUTE_VMID (16) 3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3712 { 3713 int i; 3714 uint32_t sh_mem_config; 3715 uint32_t sh_mem_bases; 3716 3717 /* 3718 * Configure apertures: 3719 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3720 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3721 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3722 */ 3723 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3724 3725 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3726 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3728 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3729 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3730 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3731 3732 mutex_lock(&adev->srbm_mutex); 3733 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3734 vi_srbm_select(adev, 0, 0, 0, i); 3735 /* CP and shaders */ 3736 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3737 WREG32(mmSH_MEM_APE1_BASE, 1); 3738 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3739 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3740 } 3741 vi_srbm_select(adev, 0, 0, 0, 0); 3742 mutex_unlock(&adev->srbm_mutex); 3743 } 3744 3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3746 { 3747 switch (adev->asic_type) { 3748 default: 3749 adev->gfx.config.double_offchip_lds_buf = 1; 3750 break; 3751 case CHIP_CARRIZO: 3752 case CHIP_STONEY: 3753 adev->gfx.config.double_offchip_lds_buf = 0; 3754 break; 3755 } 3756 } 3757 3758 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3759 { 3760 u32 tmp, sh_static_mem_cfg; 3761 int i; 3762 3763 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3764 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3765 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3766 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3767 3768 gfx_v8_0_tiling_mode_table_init(adev); 3769 gfx_v8_0_setup_rb(adev); 3770 gfx_v8_0_get_cu_info(adev); 3771 gfx_v8_0_config_init(adev); 3772 3773 /* XXX SH_MEM regs */ 3774 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3775 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3776 SWIZZLE_ENABLE, 1); 3777 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3778 ELEMENT_SIZE, 1); 3779 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3780 INDEX_STRIDE, 3); 3781 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3782 3783 mutex_lock(&adev->srbm_mutex); 3784 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3785 vi_srbm_select(adev, 0, 0, 0, i); 3786 /* CP and shaders */ 3787 if (i == 0) { 3788 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3789 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3790 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3791 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3792 WREG32(mmSH_MEM_CONFIG, tmp); 3793 WREG32(mmSH_MEM_BASES, 0); 3794 } else { 3795 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3796 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3797 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3798 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3799 WREG32(mmSH_MEM_CONFIG, tmp); 3800 tmp = adev->mc.shared_aperture_start >> 48; 3801 WREG32(mmSH_MEM_BASES, tmp); 3802 } 3803 3804 WREG32(mmSH_MEM_APE1_BASE, 1); 3805 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3806 } 3807 vi_srbm_select(adev, 0, 0, 0, 0); 3808 mutex_unlock(&adev->srbm_mutex); 3809 3810 gfx_v8_0_init_compute_vmid(adev); 3811 3812 mutex_lock(&adev->grbm_idx_mutex); 3813 /* 3814 * making sure that the following register writes will be broadcasted 3815 * to all the shaders 3816 */ 3817 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3818 3819 WREG32(mmPA_SC_FIFO_SIZE, 3820 (adev->gfx.config.sc_prim_fifo_size_frontend << 3821 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3822 (adev->gfx.config.sc_prim_fifo_size_backend << 3823 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3824 (adev->gfx.config.sc_hiz_tile_fifo_size << 3825 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3826 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3827 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3828 3829 tmp = RREG32(mmSPI_ARB_PRIORITY); 3830 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3831 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3832 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3833 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3834 WREG32(mmSPI_ARB_PRIORITY, tmp); 3835 3836 mutex_unlock(&adev->grbm_idx_mutex); 3837 3838 } 3839 3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3841 { 3842 u32 i, j, k; 3843 u32 mask; 3844 3845 mutex_lock(&adev->grbm_idx_mutex); 3846 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3847 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3848 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3849 for (k = 0; k < adev->usec_timeout; k++) { 3850 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3851 break; 3852 udelay(1); 3853 } 3854 } 3855 } 3856 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3857 mutex_unlock(&adev->grbm_idx_mutex); 3858 3859 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3860 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3861 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3862 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3863 for (k = 0; k < adev->usec_timeout; k++) { 3864 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3865 break; 3866 udelay(1); 3867 } 3868 } 3869 3870 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3871 bool enable) 3872 { 3873 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3874 3875 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3876 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3877 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3878 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3879 3880 WREG32(mmCP_INT_CNTL_RING0, tmp); 3881 } 3882 3883 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3884 { 3885 /* csib */ 3886 WREG32(mmRLC_CSIB_ADDR_HI, 3887 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3888 WREG32(mmRLC_CSIB_ADDR_LO, 3889 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3890 WREG32(mmRLC_CSIB_LENGTH, 3891 adev->gfx.rlc.clear_state_size); 3892 } 3893 3894 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3895 int ind_offset, 3896 int list_size, 3897 int *unique_indices, 3898 int *indices_count, 3899 int max_indices, 3900 int *ind_start_offsets, 3901 int *offset_count, 3902 int max_offset) 3903 { 3904 int indices; 3905 bool new_entry = true; 3906 3907 for (; ind_offset < list_size; ind_offset++) { 3908 3909 if (new_entry) { 3910 new_entry = false; 3911 ind_start_offsets[*offset_count] = ind_offset; 3912 *offset_count = *offset_count + 1; 3913 BUG_ON(*offset_count >= max_offset); 3914 } 3915 3916 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3917 new_entry = true; 3918 continue; 3919 } 3920 3921 ind_offset += 2; 3922 3923 /* look for the matching indice */ 3924 for (indices = 0; 3925 indices < *indices_count; 3926 indices++) { 3927 if (unique_indices[indices] == 3928 register_list_format[ind_offset]) 3929 break; 3930 } 3931 3932 if (indices >= *indices_count) { 3933 unique_indices[*indices_count] = 3934 register_list_format[ind_offset]; 3935 indices = *indices_count; 3936 *indices_count = *indices_count + 1; 3937 BUG_ON(*indices_count >= max_indices); 3938 } 3939 3940 register_list_format[ind_offset] = indices; 3941 } 3942 } 3943 3944 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3945 { 3946 int i, temp, data; 3947 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3948 int indices_count = 0; 3949 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3950 int offset_count = 0; 3951 3952 int list_size; 3953 unsigned int *register_list_format = 3954 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3955 if (!register_list_format) 3956 return -ENOMEM; 3957 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3958 adev->gfx.rlc.reg_list_format_size_bytes); 3959 3960 gfx_v8_0_parse_ind_reg_list(register_list_format, 3961 RLC_FormatDirectRegListLength, 3962 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3963 unique_indices, 3964 &indices_count, 3965 ARRAY_SIZE(unique_indices), 3966 indirect_start_offsets, 3967 &offset_count, 3968 ARRAY_SIZE(indirect_start_offsets)); 3969 3970 /* save and restore list */ 3971 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3972 3973 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3974 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3975 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3976 3977 /* indirect list */ 3978 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3979 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3980 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3981 3982 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3983 list_size = list_size >> 1; 3984 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3985 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3986 3987 /* starting offsets starts */ 3988 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3989 adev->gfx.rlc.starting_offsets_start); 3990 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 3991 WREG32(mmRLC_GPM_SCRATCH_DATA, 3992 indirect_start_offsets[i]); 3993 3994 /* unique indices */ 3995 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3996 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3997 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 3998 if (unique_indices[i] != 0) { 3999 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4000 WREG32(data + i, unique_indices[i] >> 20); 4001 } 4002 } 4003 kfree(register_list_format); 4004 4005 return 0; 4006 } 4007 4008 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4009 { 4010 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4011 } 4012 4013 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4014 { 4015 uint32_t data; 4016 4017 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4018 4019 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4020 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4021 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4022 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4023 WREG32(mmRLC_PG_DELAY, data); 4024 4025 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4026 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4027 4028 } 4029 4030 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4031 bool enable) 4032 { 4033 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4034 } 4035 4036 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4037 bool enable) 4038 { 4039 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4040 } 4041 4042 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4043 { 4044 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4045 } 4046 4047 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4048 { 4049 if ((adev->asic_type == CHIP_CARRIZO) || 4050 (adev->asic_type == CHIP_STONEY)) { 4051 gfx_v8_0_init_csb(adev); 4052 gfx_v8_0_init_save_restore_list(adev); 4053 gfx_v8_0_enable_save_restore_machine(adev); 4054 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4055 gfx_v8_0_init_power_gating(adev); 4056 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4057 } else if ((adev->asic_type == CHIP_POLARIS11) || 4058 (adev->asic_type == CHIP_POLARIS12)) { 4059 gfx_v8_0_init_csb(adev); 4060 gfx_v8_0_init_save_restore_list(adev); 4061 gfx_v8_0_enable_save_restore_machine(adev); 4062 gfx_v8_0_init_power_gating(adev); 4063 } 4064 4065 } 4066 4067 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4068 { 4069 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4070 4071 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4072 gfx_v8_0_wait_for_rlc_serdes(adev); 4073 } 4074 4075 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4076 { 4077 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4078 udelay(50); 4079 4080 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4081 udelay(50); 4082 } 4083 4084 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4085 { 4086 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4087 4088 /* carrizo do enable cp interrupt after cp inited */ 4089 if (!(adev->flags & AMD_IS_APU)) 4090 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4091 4092 udelay(50); 4093 } 4094 4095 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4096 { 4097 const struct rlc_firmware_header_v2_0 *hdr; 4098 const __le32 *fw_data; 4099 unsigned i, fw_size; 4100 4101 if (!adev->gfx.rlc_fw) 4102 return -EINVAL; 4103 4104 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4105 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4106 4107 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4108 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4109 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4110 4111 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4112 for (i = 0; i < fw_size; i++) 4113 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4114 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4115 4116 return 0; 4117 } 4118 4119 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4120 { 4121 int r; 4122 u32 tmp; 4123 4124 gfx_v8_0_rlc_stop(adev); 4125 4126 /* disable CG */ 4127 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4128 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4129 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4130 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4131 if (adev->asic_type == CHIP_POLARIS11 || 4132 adev->asic_type == CHIP_POLARIS10 || 4133 adev->asic_type == CHIP_POLARIS12) { 4134 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4135 tmp &= ~0x3; 4136 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4137 } 4138 4139 /* disable PG */ 4140 WREG32(mmRLC_PG_CNTL, 0); 4141 4142 gfx_v8_0_rlc_reset(adev); 4143 gfx_v8_0_init_pg(adev); 4144 4145 4146 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4147 /* legacy rlc firmware loading */ 4148 r = gfx_v8_0_rlc_load_microcode(adev); 4149 if (r) 4150 return r; 4151 } 4152 4153 gfx_v8_0_rlc_start(adev); 4154 4155 return 0; 4156 } 4157 4158 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4159 { 4160 int i; 4161 u32 tmp = RREG32(mmCP_ME_CNTL); 4162 4163 if (enable) { 4164 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4165 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4166 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4167 } else { 4168 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4169 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4170 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4171 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4172 adev->gfx.gfx_ring[i].ready = false; 4173 } 4174 WREG32(mmCP_ME_CNTL, tmp); 4175 udelay(50); 4176 } 4177 4178 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4179 { 4180 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4181 const struct gfx_firmware_header_v1_0 *ce_hdr; 4182 const struct gfx_firmware_header_v1_0 *me_hdr; 4183 const __le32 *fw_data; 4184 unsigned i, fw_size; 4185 4186 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4187 return -EINVAL; 4188 4189 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4190 adev->gfx.pfp_fw->data; 4191 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4192 adev->gfx.ce_fw->data; 4193 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4194 adev->gfx.me_fw->data; 4195 4196 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4197 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4198 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4199 4200 gfx_v8_0_cp_gfx_enable(adev, false); 4201 4202 /* PFP */ 4203 fw_data = (const __le32 *) 4204 (adev->gfx.pfp_fw->data + 4205 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4206 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4207 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4208 for (i = 0; i < fw_size; i++) 4209 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4210 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4211 4212 /* CE */ 4213 fw_data = (const __le32 *) 4214 (adev->gfx.ce_fw->data + 4215 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4216 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4217 WREG32(mmCP_CE_UCODE_ADDR, 0); 4218 for (i = 0; i < fw_size; i++) 4219 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4220 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4221 4222 /* ME */ 4223 fw_data = (const __le32 *) 4224 (adev->gfx.me_fw->data + 4225 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4226 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4227 WREG32(mmCP_ME_RAM_WADDR, 0); 4228 for (i = 0; i < fw_size; i++) 4229 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4230 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4231 4232 return 0; 4233 } 4234 4235 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4236 { 4237 u32 count = 0; 4238 const struct cs_section_def *sect = NULL; 4239 const struct cs_extent_def *ext = NULL; 4240 4241 /* begin clear state */ 4242 count += 2; 4243 /* context control state */ 4244 count += 3; 4245 4246 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4247 for (ext = sect->section; ext->extent != NULL; ++ext) { 4248 if (sect->id == SECT_CONTEXT) 4249 count += 2 + ext->reg_count; 4250 else 4251 return 0; 4252 } 4253 } 4254 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4255 count += 4; 4256 /* end clear state */ 4257 count += 2; 4258 /* clear state */ 4259 count += 2; 4260 4261 return count; 4262 } 4263 4264 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4265 { 4266 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4267 const struct cs_section_def *sect = NULL; 4268 const struct cs_extent_def *ext = NULL; 4269 int r, i; 4270 4271 /* init the CP */ 4272 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4273 WREG32(mmCP_ENDIAN_SWAP, 0); 4274 WREG32(mmCP_DEVICE_ID, 1); 4275 4276 gfx_v8_0_cp_gfx_enable(adev, true); 4277 4278 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4279 if (r) { 4280 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4281 return r; 4282 } 4283 4284 /* clear state buffer */ 4285 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4286 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4287 4288 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4289 amdgpu_ring_write(ring, 0x80000000); 4290 amdgpu_ring_write(ring, 0x80000000); 4291 4292 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4293 for (ext = sect->section; ext->extent != NULL; ++ext) { 4294 if (sect->id == SECT_CONTEXT) { 4295 amdgpu_ring_write(ring, 4296 PACKET3(PACKET3_SET_CONTEXT_REG, 4297 ext->reg_count)); 4298 amdgpu_ring_write(ring, 4299 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4300 for (i = 0; i < ext->reg_count; i++) 4301 amdgpu_ring_write(ring, ext->extent[i]); 4302 } 4303 } 4304 } 4305 4306 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4307 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4308 switch (adev->asic_type) { 4309 case CHIP_TONGA: 4310 case CHIP_POLARIS10: 4311 amdgpu_ring_write(ring, 0x16000012); 4312 amdgpu_ring_write(ring, 0x0000002A); 4313 break; 4314 case CHIP_POLARIS11: 4315 case CHIP_POLARIS12: 4316 amdgpu_ring_write(ring, 0x16000012); 4317 amdgpu_ring_write(ring, 0x00000000); 4318 break; 4319 case CHIP_FIJI: 4320 amdgpu_ring_write(ring, 0x3a00161a); 4321 amdgpu_ring_write(ring, 0x0000002e); 4322 break; 4323 case CHIP_CARRIZO: 4324 amdgpu_ring_write(ring, 0x00000002); 4325 amdgpu_ring_write(ring, 0x00000000); 4326 break; 4327 case CHIP_TOPAZ: 4328 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4329 0x00000000 : 0x00000002); 4330 amdgpu_ring_write(ring, 0x00000000); 4331 break; 4332 case CHIP_STONEY: 4333 amdgpu_ring_write(ring, 0x00000000); 4334 amdgpu_ring_write(ring, 0x00000000); 4335 break; 4336 default: 4337 BUG(); 4338 } 4339 4340 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4341 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4342 4343 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4344 amdgpu_ring_write(ring, 0); 4345 4346 /* init the CE partitions */ 4347 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4348 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4349 amdgpu_ring_write(ring, 0x8000); 4350 amdgpu_ring_write(ring, 0x8000); 4351 4352 amdgpu_ring_commit(ring); 4353 4354 return 0; 4355 } 4356 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4357 { 4358 u32 tmp; 4359 /* no gfx doorbells on iceland */ 4360 if (adev->asic_type == CHIP_TOPAZ) 4361 return; 4362 4363 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4364 4365 if (ring->use_doorbell) { 4366 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4367 DOORBELL_OFFSET, ring->doorbell_index); 4368 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4369 DOORBELL_HIT, 0); 4370 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4371 DOORBELL_EN, 1); 4372 } else { 4373 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4374 } 4375 4376 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4377 4378 if (adev->flags & AMD_IS_APU) 4379 return; 4380 4381 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4382 DOORBELL_RANGE_LOWER, 4383 AMDGPU_DOORBELL_GFX_RING0); 4384 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4385 4386 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4387 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4388 } 4389 4390 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4391 { 4392 struct amdgpu_ring *ring; 4393 u32 tmp; 4394 u32 rb_bufsz; 4395 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4396 int r; 4397 4398 /* Set the write pointer delay */ 4399 WREG32(mmCP_RB_WPTR_DELAY, 0); 4400 4401 /* set the RB to use vmid 0 */ 4402 WREG32(mmCP_RB_VMID, 0); 4403 4404 /* Set ring buffer size */ 4405 ring = &adev->gfx.gfx_ring[0]; 4406 rb_bufsz = order_base_2(ring->ring_size / 8); 4407 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4408 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4409 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4410 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4411 #ifdef __BIG_ENDIAN 4412 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4413 #endif 4414 WREG32(mmCP_RB0_CNTL, tmp); 4415 4416 /* Initialize the ring buffer's read and write pointers */ 4417 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4418 ring->wptr = 0; 4419 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4420 4421 /* set the wb address wether it's enabled or not */ 4422 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4423 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4424 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4425 4426 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4427 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4428 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4429 mdelay(1); 4430 WREG32(mmCP_RB0_CNTL, tmp); 4431 4432 rb_addr = ring->gpu_addr >> 8; 4433 WREG32(mmCP_RB0_BASE, rb_addr); 4434 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4435 4436 gfx_v8_0_set_cpg_door_bell(adev, ring); 4437 /* start the ring */ 4438 amdgpu_ring_clear_ring(ring); 4439 gfx_v8_0_cp_gfx_start(adev); 4440 ring->ready = true; 4441 r = amdgpu_ring_test_ring(ring); 4442 if (r) 4443 ring->ready = false; 4444 4445 return r; 4446 } 4447 4448 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4449 { 4450 int i; 4451 4452 if (enable) { 4453 WREG32(mmCP_MEC_CNTL, 0); 4454 } else { 4455 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4456 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4457 adev->gfx.compute_ring[i].ready = false; 4458 adev->gfx.kiq.ring.ready = false; 4459 } 4460 udelay(50); 4461 } 4462 4463 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4464 { 4465 const struct gfx_firmware_header_v1_0 *mec_hdr; 4466 const __le32 *fw_data; 4467 unsigned i, fw_size; 4468 4469 if (!adev->gfx.mec_fw) 4470 return -EINVAL; 4471 4472 gfx_v8_0_cp_compute_enable(adev, false); 4473 4474 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4475 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4476 4477 fw_data = (const __le32 *) 4478 (adev->gfx.mec_fw->data + 4479 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4480 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4481 4482 /* MEC1 */ 4483 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4484 for (i = 0; i < fw_size; i++) 4485 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4486 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4487 4488 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4489 if (adev->gfx.mec2_fw) { 4490 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4491 4492 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4493 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4494 4495 fw_data = (const __le32 *) 4496 (adev->gfx.mec2_fw->data + 4497 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4498 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4499 4500 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4501 for (i = 0; i < fw_size; i++) 4502 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4503 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4504 } 4505 4506 return 0; 4507 } 4508 4509 /* KIQ functions */ 4510 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4511 { 4512 uint32_t tmp; 4513 struct amdgpu_device *adev = ring->adev; 4514 4515 /* tell RLC which is KIQ queue */ 4516 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4517 tmp &= 0xffffff00; 4518 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4519 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4520 tmp |= 0x80; 4521 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4522 } 4523 4524 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4525 { 4526 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4527 uint32_t scratch, tmp = 0; 4528 uint64_t queue_mask = 0; 4529 int r, i; 4530 4531 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4532 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4533 continue; 4534 4535 /* This situation may be hit in the future if a new HW 4536 * generation exposes more than 64 queues. If so, the 4537 * definition of queue_mask needs updating */ 4538 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4539 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4540 break; 4541 } 4542 4543 queue_mask |= (1ull << i); 4544 } 4545 4546 r = amdgpu_gfx_scratch_get(adev, &scratch); 4547 if (r) { 4548 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4549 return r; 4550 } 4551 WREG32(scratch, 0xCAFEDEAD); 4552 4553 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4554 if (r) { 4555 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4556 amdgpu_gfx_scratch_free(adev, scratch); 4557 return r; 4558 } 4559 /* set resources */ 4560 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4561 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4562 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4563 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4564 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4565 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4566 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4567 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4568 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4569 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4570 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4571 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4572 4573 /* map queues */ 4574 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4575 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4576 amdgpu_ring_write(kiq_ring, 4577 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4578 amdgpu_ring_write(kiq_ring, 4579 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4580 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4581 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4582 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4583 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4584 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4585 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4586 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4587 } 4588 /* write to scratch for completion */ 4589 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4590 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4591 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4592 amdgpu_ring_commit(kiq_ring); 4593 4594 for (i = 0; i < adev->usec_timeout; i++) { 4595 tmp = RREG32(scratch); 4596 if (tmp == 0xDEADBEEF) 4597 break; 4598 DRM_UDELAY(1); 4599 } 4600 if (i >= adev->usec_timeout) { 4601 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4602 scratch, tmp); 4603 r = -EINVAL; 4604 } 4605 amdgpu_gfx_scratch_free(adev, scratch); 4606 4607 return r; 4608 } 4609 4610 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4611 { 4612 int i, r = 0; 4613 4614 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4615 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4616 for (i = 0; i < adev->usec_timeout; i++) { 4617 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4618 break; 4619 udelay(1); 4620 } 4621 if (i == adev->usec_timeout) 4622 r = -ETIMEDOUT; 4623 } 4624 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4625 WREG32(mmCP_HQD_PQ_RPTR, 0); 4626 WREG32(mmCP_HQD_PQ_WPTR, 0); 4627 4628 return r; 4629 } 4630 4631 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4632 { 4633 struct amdgpu_device *adev = ring->adev; 4634 struct vi_mqd *mqd = ring->mqd_ptr; 4635 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4636 uint32_t tmp; 4637 4638 mqd->header = 0xC0310800; 4639 mqd->compute_pipelinestat_enable = 0x00000001; 4640 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4641 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4642 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4643 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4644 mqd->compute_misc_reserved = 0x00000003; 4645 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4646 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4647 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4648 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4649 eop_base_addr = ring->eop_gpu_addr >> 8; 4650 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4651 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4652 4653 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4654 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4655 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4656 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4657 4658 mqd->cp_hqd_eop_control = tmp; 4659 4660 /* enable doorbell? */ 4661 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4662 CP_HQD_PQ_DOORBELL_CONTROL, 4663 DOORBELL_EN, 4664 ring->use_doorbell ? 1 : 0); 4665 4666 mqd->cp_hqd_pq_doorbell_control = tmp; 4667 4668 /* set the pointer to the MQD */ 4669 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4670 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4671 4672 /* set MQD vmid to 0 */ 4673 tmp = RREG32(mmCP_MQD_CONTROL); 4674 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4675 mqd->cp_mqd_control = tmp; 4676 4677 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4678 hqd_gpu_addr = ring->gpu_addr >> 8; 4679 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4680 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4681 4682 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4683 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4684 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4685 (order_base_2(ring->ring_size / 4) - 1)); 4686 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4687 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4688 #ifdef __BIG_ENDIAN 4689 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4690 #endif 4691 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4692 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4694 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4695 mqd->cp_hqd_pq_control = tmp; 4696 4697 /* set the wb address whether it's enabled or not */ 4698 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4699 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4700 mqd->cp_hqd_pq_rptr_report_addr_hi = 4701 upper_32_bits(wb_gpu_addr) & 0xffff; 4702 4703 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4704 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4705 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4706 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4707 4708 tmp = 0; 4709 /* enable the doorbell if requested */ 4710 if (ring->use_doorbell) { 4711 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4712 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4713 DOORBELL_OFFSET, ring->doorbell_index); 4714 4715 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4716 DOORBELL_EN, 1); 4717 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4718 DOORBELL_SOURCE, 0); 4719 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4720 DOORBELL_HIT, 0); 4721 } 4722 4723 mqd->cp_hqd_pq_doorbell_control = tmp; 4724 4725 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4726 ring->wptr = 0; 4727 mqd->cp_hqd_pq_wptr = ring->wptr; 4728 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4729 4730 /* set the vmid for the queue */ 4731 mqd->cp_hqd_vmid = 0; 4732 4733 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4735 mqd->cp_hqd_persistent_state = tmp; 4736 4737 /* set MTYPE */ 4738 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4739 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4740 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4741 mqd->cp_hqd_ib_control = tmp; 4742 4743 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4744 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4745 mqd->cp_hqd_iq_timer = tmp; 4746 4747 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4748 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4749 mqd->cp_hqd_ctx_save_control = tmp; 4750 4751 /* defaults */ 4752 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4753 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4754 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4755 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4756 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4757 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4758 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4759 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4760 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4761 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4762 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4763 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4764 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4765 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4766 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4767 4768 /* activate the queue */ 4769 mqd->cp_hqd_active = 1; 4770 4771 return 0; 4772 } 4773 4774 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4775 struct vi_mqd *mqd) 4776 { 4777 uint32_t mqd_reg; 4778 uint32_t *mqd_data; 4779 4780 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4781 mqd_data = &mqd->cp_mqd_base_addr_lo; 4782 4783 /* disable wptr polling */ 4784 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4785 4786 /* program all HQD registers */ 4787 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4788 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4789 4790 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4791 * This is safe since EOP RPTR==WPTR for any inactive HQD 4792 * on ASICs that do not support context-save. 4793 * EOP writes/reads can start anywhere in the ring. 4794 */ 4795 if (adev->asic_type != CHIP_TONGA) { 4796 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4797 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4798 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4799 } 4800 4801 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4802 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4803 4804 /* activate the HQD */ 4805 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4806 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4807 4808 return 0; 4809 } 4810 4811 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4812 { 4813 struct amdgpu_device *adev = ring->adev; 4814 struct vi_mqd *mqd = ring->mqd_ptr; 4815 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4816 4817 gfx_v8_0_kiq_setting(ring); 4818 4819 if (adev->in_sriov_reset) { /* for GPU_RESET case */ 4820 /* reset MQD to a clean status */ 4821 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4822 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4823 4824 /* reset ring buffer */ 4825 ring->wptr = 0; 4826 amdgpu_ring_clear_ring(ring); 4827 mutex_lock(&adev->srbm_mutex); 4828 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4829 gfx_v8_0_mqd_commit(adev, mqd); 4830 vi_srbm_select(adev, 0, 0, 0, 0); 4831 mutex_unlock(&adev->srbm_mutex); 4832 } else { 4833 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4834 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4835 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4836 mutex_lock(&adev->srbm_mutex); 4837 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4838 gfx_v8_0_mqd_init(ring); 4839 gfx_v8_0_mqd_commit(adev, mqd); 4840 vi_srbm_select(adev, 0, 0, 0, 0); 4841 mutex_unlock(&adev->srbm_mutex); 4842 4843 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4844 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4845 } 4846 4847 return 0; 4848 } 4849 4850 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4851 { 4852 struct amdgpu_device *adev = ring->adev; 4853 struct vi_mqd *mqd = ring->mqd_ptr; 4854 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4855 4856 if (!adev->in_sriov_reset && !adev->gfx.in_suspend) { 4857 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4858 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4859 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4860 mutex_lock(&adev->srbm_mutex); 4861 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4862 gfx_v8_0_mqd_init(ring); 4863 vi_srbm_select(adev, 0, 0, 0, 0); 4864 mutex_unlock(&adev->srbm_mutex); 4865 4866 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4867 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4868 } else if (adev->in_sriov_reset) { /* for GPU_RESET case */ 4869 /* reset MQD to a clean status */ 4870 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4871 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4872 /* reset ring buffer */ 4873 ring->wptr = 0; 4874 amdgpu_ring_clear_ring(ring); 4875 } else { 4876 amdgpu_ring_clear_ring(ring); 4877 } 4878 return 0; 4879 } 4880 4881 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4882 { 4883 if (adev->asic_type > CHIP_TONGA) { 4884 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4885 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4886 } 4887 /* enable doorbells */ 4888 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4889 } 4890 4891 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4892 { 4893 struct amdgpu_ring *ring = NULL; 4894 int r = 0, i; 4895 4896 gfx_v8_0_cp_compute_enable(adev, true); 4897 4898 ring = &adev->gfx.kiq.ring; 4899 4900 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4901 if (unlikely(r != 0)) 4902 goto done; 4903 4904 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4905 if (!r) { 4906 r = gfx_v8_0_kiq_init_queue(ring); 4907 amdgpu_bo_kunmap(ring->mqd_obj); 4908 ring->mqd_ptr = NULL; 4909 } 4910 amdgpu_bo_unreserve(ring->mqd_obj); 4911 if (r) 4912 goto done; 4913 4914 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4915 ring = &adev->gfx.compute_ring[i]; 4916 4917 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4918 if (unlikely(r != 0)) 4919 goto done; 4920 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4921 if (!r) { 4922 r = gfx_v8_0_kcq_init_queue(ring); 4923 amdgpu_bo_kunmap(ring->mqd_obj); 4924 ring->mqd_ptr = NULL; 4925 } 4926 amdgpu_bo_unreserve(ring->mqd_obj); 4927 if (r) 4928 goto done; 4929 } 4930 4931 gfx_v8_0_set_mec_doorbell_range(adev); 4932 4933 r = gfx_v8_0_kiq_kcq_enable(adev); 4934 if (r) 4935 goto done; 4936 4937 /* Test KIQ */ 4938 ring = &adev->gfx.kiq.ring; 4939 ring->ready = true; 4940 r = amdgpu_ring_test_ring(ring); 4941 if (r) { 4942 ring->ready = false; 4943 goto done; 4944 } 4945 4946 /* Test KCQs */ 4947 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4948 ring = &adev->gfx.compute_ring[i]; 4949 ring->ready = true; 4950 r = amdgpu_ring_test_ring(ring); 4951 if (r) 4952 ring->ready = false; 4953 } 4954 4955 done: 4956 return r; 4957 } 4958 4959 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4960 { 4961 int r; 4962 4963 if (!(adev->flags & AMD_IS_APU)) 4964 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4965 4966 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4967 /* legacy firmware loading */ 4968 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4969 if (r) 4970 return r; 4971 4972 r = gfx_v8_0_cp_compute_load_microcode(adev); 4973 if (r) 4974 return r; 4975 } 4976 4977 r = gfx_v8_0_cp_gfx_resume(adev); 4978 if (r) 4979 return r; 4980 4981 r = gfx_v8_0_kiq_resume(adev); 4982 if (r) 4983 return r; 4984 4985 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4986 4987 return 0; 4988 } 4989 4990 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4991 { 4992 gfx_v8_0_cp_gfx_enable(adev, enable); 4993 gfx_v8_0_cp_compute_enable(adev, enable); 4994 } 4995 4996 static int gfx_v8_0_hw_init(void *handle) 4997 { 4998 int r; 4999 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5000 5001 gfx_v8_0_init_golden_registers(adev); 5002 gfx_v8_0_gpu_init(adev); 5003 5004 r = gfx_v8_0_rlc_resume(adev); 5005 if (r) 5006 return r; 5007 5008 r = gfx_v8_0_cp_resume(adev); 5009 5010 return r; 5011 } 5012 5013 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 5014 { 5015 struct amdgpu_device *adev = kiq_ring->adev; 5016 uint32_t scratch, tmp = 0; 5017 int r, i; 5018 5019 r = amdgpu_gfx_scratch_get(adev, &scratch); 5020 if (r) { 5021 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5022 return r; 5023 } 5024 WREG32(scratch, 0xCAFEDEAD); 5025 5026 r = amdgpu_ring_alloc(kiq_ring, 10); 5027 if (r) { 5028 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5029 amdgpu_gfx_scratch_free(adev, scratch); 5030 return r; 5031 } 5032 5033 /* unmap queues */ 5034 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5035 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5036 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5037 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5038 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5039 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5040 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5041 amdgpu_ring_write(kiq_ring, 0); 5042 amdgpu_ring_write(kiq_ring, 0); 5043 amdgpu_ring_write(kiq_ring, 0); 5044 /* write to scratch for completion */ 5045 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5046 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5047 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5048 amdgpu_ring_commit(kiq_ring); 5049 5050 for (i = 0; i < adev->usec_timeout; i++) { 5051 tmp = RREG32(scratch); 5052 if (tmp == 0xDEADBEEF) 5053 break; 5054 DRM_UDELAY(1); 5055 } 5056 if (i >= adev->usec_timeout) { 5057 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5058 r = -EINVAL; 5059 } 5060 amdgpu_gfx_scratch_free(adev, scratch); 5061 return r; 5062 } 5063 5064 static int gfx_v8_0_hw_fini(void *handle) 5065 { 5066 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5067 int i; 5068 5069 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5070 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5071 5072 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5073 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5074 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5075 5076 if (amdgpu_sriov_vf(adev)) { 5077 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5078 return 0; 5079 } 5080 gfx_v8_0_cp_enable(adev, false); 5081 gfx_v8_0_rlc_stop(adev); 5082 5083 amdgpu_set_powergating_state(adev, 5084 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5085 5086 return 0; 5087 } 5088 5089 static int gfx_v8_0_suspend(void *handle) 5090 { 5091 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5092 adev->gfx.in_suspend = true; 5093 return gfx_v8_0_hw_fini(adev); 5094 } 5095 5096 static int gfx_v8_0_resume(void *handle) 5097 { 5098 int r; 5099 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5100 5101 r = gfx_v8_0_hw_init(adev); 5102 adev->gfx.in_suspend = false; 5103 return r; 5104 } 5105 5106 static bool gfx_v8_0_is_idle(void *handle) 5107 { 5108 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5109 5110 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5111 return false; 5112 else 5113 return true; 5114 } 5115 5116 static int gfx_v8_0_wait_for_idle(void *handle) 5117 { 5118 unsigned i; 5119 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5120 5121 for (i = 0; i < adev->usec_timeout; i++) { 5122 if (gfx_v8_0_is_idle(handle)) 5123 return 0; 5124 5125 udelay(1); 5126 } 5127 return -ETIMEDOUT; 5128 } 5129 5130 static bool gfx_v8_0_check_soft_reset(void *handle) 5131 { 5132 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5133 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5134 u32 tmp; 5135 5136 /* GRBM_STATUS */ 5137 tmp = RREG32(mmGRBM_STATUS); 5138 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5139 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5140 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5141 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5142 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5143 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5144 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5145 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5146 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5148 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5149 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5150 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5151 } 5152 5153 /* GRBM_STATUS2 */ 5154 tmp = RREG32(mmGRBM_STATUS2); 5155 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5156 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5157 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5158 5159 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5160 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5161 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5163 SOFT_RESET_CPF, 1); 5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5165 SOFT_RESET_CPC, 1); 5166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5167 SOFT_RESET_CPG, 1); 5168 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5169 SOFT_RESET_GRBM, 1); 5170 } 5171 5172 /* SRBM_STATUS */ 5173 tmp = RREG32(mmSRBM_STATUS); 5174 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5175 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5176 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5177 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5178 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5179 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5180 5181 if (grbm_soft_reset || srbm_soft_reset) { 5182 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5183 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5184 return true; 5185 } else { 5186 adev->gfx.grbm_soft_reset = 0; 5187 adev->gfx.srbm_soft_reset = 0; 5188 return false; 5189 } 5190 } 5191 5192 static int gfx_v8_0_pre_soft_reset(void *handle) 5193 { 5194 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5195 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5196 5197 if ((!adev->gfx.grbm_soft_reset) && 5198 (!adev->gfx.srbm_soft_reset)) 5199 return 0; 5200 5201 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5202 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5203 5204 /* stop the rlc */ 5205 gfx_v8_0_rlc_stop(adev); 5206 5207 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5208 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5209 /* Disable GFX parsing/prefetching */ 5210 gfx_v8_0_cp_gfx_enable(adev, false); 5211 5212 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5213 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5214 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5215 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5216 int i; 5217 5218 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5219 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5220 5221 mutex_lock(&adev->srbm_mutex); 5222 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5223 gfx_v8_0_deactivate_hqd(adev, 2); 5224 vi_srbm_select(adev, 0, 0, 0, 0); 5225 mutex_unlock(&adev->srbm_mutex); 5226 } 5227 /* Disable MEC parsing/prefetching */ 5228 gfx_v8_0_cp_compute_enable(adev, false); 5229 } 5230 5231 return 0; 5232 } 5233 5234 static int gfx_v8_0_soft_reset(void *handle) 5235 { 5236 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5237 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5238 u32 tmp; 5239 5240 if ((!adev->gfx.grbm_soft_reset) && 5241 (!adev->gfx.srbm_soft_reset)) 5242 return 0; 5243 5244 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5245 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5246 5247 if (grbm_soft_reset || srbm_soft_reset) { 5248 tmp = RREG32(mmGMCON_DEBUG); 5249 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5250 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5251 WREG32(mmGMCON_DEBUG, tmp); 5252 udelay(50); 5253 } 5254 5255 if (grbm_soft_reset) { 5256 tmp = RREG32(mmGRBM_SOFT_RESET); 5257 tmp |= grbm_soft_reset; 5258 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5259 WREG32(mmGRBM_SOFT_RESET, tmp); 5260 tmp = RREG32(mmGRBM_SOFT_RESET); 5261 5262 udelay(50); 5263 5264 tmp &= ~grbm_soft_reset; 5265 WREG32(mmGRBM_SOFT_RESET, tmp); 5266 tmp = RREG32(mmGRBM_SOFT_RESET); 5267 } 5268 5269 if (srbm_soft_reset) { 5270 tmp = RREG32(mmSRBM_SOFT_RESET); 5271 tmp |= srbm_soft_reset; 5272 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5273 WREG32(mmSRBM_SOFT_RESET, tmp); 5274 tmp = RREG32(mmSRBM_SOFT_RESET); 5275 5276 udelay(50); 5277 5278 tmp &= ~srbm_soft_reset; 5279 WREG32(mmSRBM_SOFT_RESET, tmp); 5280 tmp = RREG32(mmSRBM_SOFT_RESET); 5281 } 5282 5283 if (grbm_soft_reset || srbm_soft_reset) { 5284 tmp = RREG32(mmGMCON_DEBUG); 5285 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5286 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5287 WREG32(mmGMCON_DEBUG, tmp); 5288 } 5289 5290 /* Wait a little for things to settle down */ 5291 udelay(50); 5292 5293 return 0; 5294 } 5295 5296 static int gfx_v8_0_post_soft_reset(void *handle) 5297 { 5298 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5299 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5300 5301 if ((!adev->gfx.grbm_soft_reset) && 5302 (!adev->gfx.srbm_soft_reset)) 5303 return 0; 5304 5305 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5306 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5307 5308 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5309 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5310 gfx_v8_0_cp_gfx_resume(adev); 5311 5312 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5313 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5314 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5315 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5316 int i; 5317 5318 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5319 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5320 5321 mutex_lock(&adev->srbm_mutex); 5322 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5323 gfx_v8_0_deactivate_hqd(adev, 2); 5324 vi_srbm_select(adev, 0, 0, 0, 0); 5325 mutex_unlock(&adev->srbm_mutex); 5326 } 5327 gfx_v8_0_kiq_resume(adev); 5328 } 5329 gfx_v8_0_rlc_start(adev); 5330 5331 return 0; 5332 } 5333 5334 /** 5335 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5336 * 5337 * @adev: amdgpu_device pointer 5338 * 5339 * Fetches a GPU clock counter snapshot. 5340 * Returns the 64 bit clock counter snapshot. 5341 */ 5342 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5343 { 5344 uint64_t clock; 5345 5346 mutex_lock(&adev->gfx.gpu_clock_mutex); 5347 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5348 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5349 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5350 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5351 return clock; 5352 } 5353 5354 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5355 uint32_t vmid, 5356 uint32_t gds_base, uint32_t gds_size, 5357 uint32_t gws_base, uint32_t gws_size, 5358 uint32_t oa_base, uint32_t oa_size) 5359 { 5360 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5361 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5362 5363 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5364 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5365 5366 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5367 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5368 5369 /* GDS Base */ 5370 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5371 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5372 WRITE_DATA_DST_SEL(0))); 5373 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5374 amdgpu_ring_write(ring, 0); 5375 amdgpu_ring_write(ring, gds_base); 5376 5377 /* GDS Size */ 5378 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5379 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5380 WRITE_DATA_DST_SEL(0))); 5381 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5382 amdgpu_ring_write(ring, 0); 5383 amdgpu_ring_write(ring, gds_size); 5384 5385 /* GWS */ 5386 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5387 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5388 WRITE_DATA_DST_SEL(0))); 5389 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5390 amdgpu_ring_write(ring, 0); 5391 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5392 5393 /* OA */ 5394 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5395 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5396 WRITE_DATA_DST_SEL(0))); 5397 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5398 amdgpu_ring_write(ring, 0); 5399 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5400 } 5401 5402 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5403 { 5404 WREG32(mmSQ_IND_INDEX, 5405 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5406 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5407 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5408 (SQ_IND_INDEX__FORCE_READ_MASK)); 5409 return RREG32(mmSQ_IND_DATA); 5410 } 5411 5412 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5413 uint32_t wave, uint32_t thread, 5414 uint32_t regno, uint32_t num, uint32_t *out) 5415 { 5416 WREG32(mmSQ_IND_INDEX, 5417 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5418 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5419 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5420 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5421 (SQ_IND_INDEX__FORCE_READ_MASK) | 5422 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5423 while (num--) 5424 *(out++) = RREG32(mmSQ_IND_DATA); 5425 } 5426 5427 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5428 { 5429 /* type 0 wave data */ 5430 dst[(*no_fields)++] = 0; 5431 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5432 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5434 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5435 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5436 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5437 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5438 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5439 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5440 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5441 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5449 } 5450 5451 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5452 uint32_t wave, uint32_t start, 5453 uint32_t size, uint32_t *dst) 5454 { 5455 wave_read_regs( 5456 adev, simd, wave, 0, 5457 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5458 } 5459 5460 5461 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5462 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5463 .select_se_sh = &gfx_v8_0_select_se_sh, 5464 .read_wave_data = &gfx_v8_0_read_wave_data, 5465 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5466 }; 5467 5468 static int gfx_v8_0_early_init(void *handle) 5469 { 5470 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5471 5472 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5473 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5474 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5475 gfx_v8_0_set_ring_funcs(adev); 5476 gfx_v8_0_set_irq_funcs(adev); 5477 gfx_v8_0_set_gds_init(adev); 5478 gfx_v8_0_set_rlc_funcs(adev); 5479 5480 return 0; 5481 } 5482 5483 static int gfx_v8_0_late_init(void *handle) 5484 { 5485 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5486 int r; 5487 5488 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5489 if (r) 5490 return r; 5491 5492 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5493 if (r) 5494 return r; 5495 5496 /* requires IBs so do in late init after IB pool is initialized */ 5497 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5498 if (r) 5499 return r; 5500 5501 amdgpu_set_powergating_state(adev, 5502 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5503 5504 return 0; 5505 } 5506 5507 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5508 bool enable) 5509 { 5510 if ((adev->asic_type == CHIP_POLARIS11) || 5511 (adev->asic_type == CHIP_POLARIS12)) 5512 /* Send msg to SMU via Powerplay */ 5513 amdgpu_set_powergating_state(adev, 5514 AMD_IP_BLOCK_TYPE_SMC, 5515 enable ? 5516 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5517 5518 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5519 } 5520 5521 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5522 bool enable) 5523 { 5524 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5525 } 5526 5527 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5528 bool enable) 5529 { 5530 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5531 } 5532 5533 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5534 bool enable) 5535 { 5536 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5537 } 5538 5539 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5540 bool enable) 5541 { 5542 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5543 5544 /* Read any GFX register to wake up GFX. */ 5545 if (!enable) 5546 RREG32(mmDB_RENDER_CONTROL); 5547 } 5548 5549 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5550 bool enable) 5551 { 5552 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5553 cz_enable_gfx_cg_power_gating(adev, true); 5554 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5555 cz_enable_gfx_pipeline_power_gating(adev, true); 5556 } else { 5557 cz_enable_gfx_cg_power_gating(adev, false); 5558 cz_enable_gfx_pipeline_power_gating(adev, false); 5559 } 5560 } 5561 5562 static int gfx_v8_0_set_powergating_state(void *handle, 5563 enum amd_powergating_state state) 5564 { 5565 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5566 bool enable = (state == AMD_PG_STATE_GATE); 5567 5568 if (amdgpu_sriov_vf(adev)) 5569 return 0; 5570 5571 switch (adev->asic_type) { 5572 case CHIP_CARRIZO: 5573 case CHIP_STONEY: 5574 5575 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5576 cz_enable_sck_slow_down_on_power_up(adev, true); 5577 cz_enable_sck_slow_down_on_power_down(adev, true); 5578 } else { 5579 cz_enable_sck_slow_down_on_power_up(adev, false); 5580 cz_enable_sck_slow_down_on_power_down(adev, false); 5581 } 5582 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5583 cz_enable_cp_power_gating(adev, true); 5584 else 5585 cz_enable_cp_power_gating(adev, false); 5586 5587 cz_update_gfx_cg_power_gating(adev, enable); 5588 5589 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5590 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5591 else 5592 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5593 5594 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5595 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5596 else 5597 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5598 break; 5599 case CHIP_POLARIS11: 5600 case CHIP_POLARIS12: 5601 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5602 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5603 else 5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5605 5606 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5607 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5608 else 5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5610 5611 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5612 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5613 else 5614 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5615 break; 5616 default: 5617 break; 5618 } 5619 5620 return 0; 5621 } 5622 5623 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5624 { 5625 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5626 int data; 5627 5628 if (amdgpu_sriov_vf(adev)) 5629 *flags = 0; 5630 5631 /* AMD_CG_SUPPORT_GFX_MGCG */ 5632 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5633 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5634 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5635 5636 /* AMD_CG_SUPPORT_GFX_CGLG */ 5637 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5638 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5639 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5640 5641 /* AMD_CG_SUPPORT_GFX_CGLS */ 5642 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5643 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5644 5645 /* AMD_CG_SUPPORT_GFX_CGTS */ 5646 data = RREG32(mmCGTS_SM_CTRL_REG); 5647 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5648 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5649 5650 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5651 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5652 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5653 5654 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5655 data = RREG32(mmRLC_MEM_SLP_CNTL); 5656 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5657 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5658 5659 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5660 data = RREG32(mmCP_MEM_SLP_CNTL); 5661 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5662 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5663 } 5664 5665 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5666 uint32_t reg_addr, uint32_t cmd) 5667 { 5668 uint32_t data; 5669 5670 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5671 5672 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5673 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5674 5675 data = RREG32(mmRLC_SERDES_WR_CTRL); 5676 if (adev->asic_type == CHIP_STONEY) 5677 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5678 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5679 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5680 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5681 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5682 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5683 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5684 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5685 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5686 else 5687 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5688 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5689 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5690 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5691 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5692 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5693 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5694 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5695 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5696 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5697 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5698 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5699 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5700 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5701 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5702 5703 WREG32(mmRLC_SERDES_WR_CTRL, data); 5704 } 5705 5706 #define MSG_ENTER_RLC_SAFE_MODE 1 5707 #define MSG_EXIT_RLC_SAFE_MODE 0 5708 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5709 #define RLC_GPR_REG2__REQ__SHIFT 0 5710 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5711 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5712 5713 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5714 { 5715 u32 data; 5716 unsigned i; 5717 5718 data = RREG32(mmRLC_CNTL); 5719 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5720 return; 5721 5722 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5723 data |= RLC_SAFE_MODE__CMD_MASK; 5724 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5725 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5726 WREG32(mmRLC_SAFE_MODE, data); 5727 5728 for (i = 0; i < adev->usec_timeout; i++) { 5729 if ((RREG32(mmRLC_GPM_STAT) & 5730 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5731 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5732 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5733 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5734 break; 5735 udelay(1); 5736 } 5737 5738 for (i = 0; i < adev->usec_timeout; i++) { 5739 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5740 break; 5741 udelay(1); 5742 } 5743 adev->gfx.rlc.in_safe_mode = true; 5744 } 5745 } 5746 5747 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5748 { 5749 u32 data = 0; 5750 unsigned i; 5751 5752 data = RREG32(mmRLC_CNTL); 5753 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5754 return; 5755 5756 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5757 if (adev->gfx.rlc.in_safe_mode) { 5758 data |= RLC_SAFE_MODE__CMD_MASK; 5759 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5760 WREG32(mmRLC_SAFE_MODE, data); 5761 adev->gfx.rlc.in_safe_mode = false; 5762 } 5763 } 5764 5765 for (i = 0; i < adev->usec_timeout; i++) { 5766 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5767 break; 5768 udelay(1); 5769 } 5770 } 5771 5772 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5773 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5774 .exit_safe_mode = iceland_exit_rlc_safe_mode 5775 }; 5776 5777 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5778 bool enable) 5779 { 5780 uint32_t temp, data; 5781 5782 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5783 5784 /* It is disabled by HW by default */ 5785 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5786 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5787 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5788 /* 1 - RLC memory Light sleep */ 5789 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5790 5791 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5792 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5793 } 5794 5795 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5796 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5797 if (adev->flags & AMD_IS_APU) 5798 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5799 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5800 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5801 else 5802 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5803 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5804 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5805 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5806 5807 if (temp != data) 5808 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5809 5810 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5811 gfx_v8_0_wait_for_rlc_serdes(adev); 5812 5813 /* 5 - clear mgcg override */ 5814 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5815 5816 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5817 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5818 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5819 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5820 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5821 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5822 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5823 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5824 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5825 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5826 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5827 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5828 if (temp != data) 5829 WREG32(mmCGTS_SM_CTRL_REG, data); 5830 } 5831 udelay(50); 5832 5833 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5834 gfx_v8_0_wait_for_rlc_serdes(adev); 5835 } else { 5836 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5837 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5838 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5839 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5840 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5841 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5842 if (temp != data) 5843 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5844 5845 /* 2 - disable MGLS in RLC */ 5846 data = RREG32(mmRLC_MEM_SLP_CNTL); 5847 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5848 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5849 WREG32(mmRLC_MEM_SLP_CNTL, data); 5850 } 5851 5852 /* 3 - disable MGLS in CP */ 5853 data = RREG32(mmCP_MEM_SLP_CNTL); 5854 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5855 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5856 WREG32(mmCP_MEM_SLP_CNTL, data); 5857 } 5858 5859 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5860 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5861 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5862 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5863 if (temp != data) 5864 WREG32(mmCGTS_SM_CTRL_REG, data); 5865 5866 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5867 gfx_v8_0_wait_for_rlc_serdes(adev); 5868 5869 /* 6 - set mgcg override */ 5870 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5871 5872 udelay(50); 5873 5874 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5875 gfx_v8_0_wait_for_rlc_serdes(adev); 5876 } 5877 5878 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5879 } 5880 5881 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5882 bool enable) 5883 { 5884 uint32_t temp, temp1, data, data1; 5885 5886 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5887 5888 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5889 5890 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5891 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5892 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5893 if (temp1 != data1) 5894 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5895 5896 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5897 gfx_v8_0_wait_for_rlc_serdes(adev); 5898 5899 /* 2 - clear cgcg override */ 5900 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5901 5902 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5903 gfx_v8_0_wait_for_rlc_serdes(adev); 5904 5905 /* 3 - write cmd to set CGLS */ 5906 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5907 5908 /* 4 - enable cgcg */ 5909 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5910 5911 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5912 /* enable cgls*/ 5913 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5914 5915 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5916 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5917 5918 if (temp1 != data1) 5919 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5920 } else { 5921 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5922 } 5923 5924 if (temp != data) 5925 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5926 5927 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5928 * Cmp_busy/GFX_Idle interrupts 5929 */ 5930 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5931 } else { 5932 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5933 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5934 5935 /* TEST CGCG */ 5936 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5937 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5938 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5939 if (temp1 != data1) 5940 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5941 5942 /* read gfx register to wake up cgcg */ 5943 RREG32(mmCB_CGTT_SCLK_CTRL); 5944 RREG32(mmCB_CGTT_SCLK_CTRL); 5945 RREG32(mmCB_CGTT_SCLK_CTRL); 5946 RREG32(mmCB_CGTT_SCLK_CTRL); 5947 5948 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5949 gfx_v8_0_wait_for_rlc_serdes(adev); 5950 5951 /* write cmd to Set CGCG Overrride */ 5952 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5953 5954 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5955 gfx_v8_0_wait_for_rlc_serdes(adev); 5956 5957 /* write cmd to Clear CGLS */ 5958 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5959 5960 /* disable cgcg, cgls should be disabled too. */ 5961 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5962 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5963 if (temp != data) 5964 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5965 /* enable interrupts again for PG */ 5966 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5967 } 5968 5969 gfx_v8_0_wait_for_rlc_serdes(adev); 5970 5971 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5972 } 5973 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5974 bool enable) 5975 { 5976 if (enable) { 5977 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5978 * === MGCG + MGLS + TS(CG/LS) === 5979 */ 5980 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5981 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5982 } else { 5983 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5984 * === CGCG + CGLS === 5985 */ 5986 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5987 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5988 } 5989 return 0; 5990 } 5991 5992 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5993 enum amd_clockgating_state state) 5994 { 5995 uint32_t msg_id, pp_state = 0; 5996 uint32_t pp_support_state = 0; 5997 5998 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5999 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6000 pp_support_state = PP_STATE_SUPPORT_LS; 6001 pp_state = PP_STATE_LS; 6002 } 6003 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6004 pp_support_state |= PP_STATE_SUPPORT_CG; 6005 pp_state |= PP_STATE_CG; 6006 } 6007 if (state == AMD_CG_STATE_UNGATE) 6008 pp_state = 0; 6009 6010 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6011 PP_BLOCK_GFX_CG, 6012 pp_support_state, 6013 pp_state); 6014 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6015 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6016 } 6017 6018 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6019 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6020 pp_support_state = PP_STATE_SUPPORT_LS; 6021 pp_state = PP_STATE_LS; 6022 } 6023 6024 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6025 pp_support_state |= PP_STATE_SUPPORT_CG; 6026 pp_state |= PP_STATE_CG; 6027 } 6028 6029 if (state == AMD_CG_STATE_UNGATE) 6030 pp_state = 0; 6031 6032 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6033 PP_BLOCK_GFX_MG, 6034 pp_support_state, 6035 pp_state); 6036 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6037 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6038 } 6039 6040 return 0; 6041 } 6042 6043 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6044 enum amd_clockgating_state state) 6045 { 6046 6047 uint32_t msg_id, pp_state = 0; 6048 uint32_t pp_support_state = 0; 6049 6050 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6051 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6052 pp_support_state = PP_STATE_SUPPORT_LS; 6053 pp_state = PP_STATE_LS; 6054 } 6055 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6056 pp_support_state |= PP_STATE_SUPPORT_CG; 6057 pp_state |= PP_STATE_CG; 6058 } 6059 if (state == AMD_CG_STATE_UNGATE) 6060 pp_state = 0; 6061 6062 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6063 PP_BLOCK_GFX_CG, 6064 pp_support_state, 6065 pp_state); 6066 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6067 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6068 } 6069 6070 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6071 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6072 pp_support_state = PP_STATE_SUPPORT_LS; 6073 pp_state = PP_STATE_LS; 6074 } 6075 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6076 pp_support_state |= PP_STATE_SUPPORT_CG; 6077 pp_state |= PP_STATE_CG; 6078 } 6079 if (state == AMD_CG_STATE_UNGATE) 6080 pp_state = 0; 6081 6082 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6083 PP_BLOCK_GFX_3D, 6084 pp_support_state, 6085 pp_state); 6086 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6087 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6088 } 6089 6090 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6091 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6092 pp_support_state = PP_STATE_SUPPORT_LS; 6093 pp_state = PP_STATE_LS; 6094 } 6095 6096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6097 pp_support_state |= PP_STATE_SUPPORT_CG; 6098 pp_state |= PP_STATE_CG; 6099 } 6100 6101 if (state == AMD_CG_STATE_UNGATE) 6102 pp_state = 0; 6103 6104 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6105 PP_BLOCK_GFX_MG, 6106 pp_support_state, 6107 pp_state); 6108 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6109 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6110 } 6111 6112 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6113 pp_support_state = PP_STATE_SUPPORT_LS; 6114 6115 if (state == AMD_CG_STATE_UNGATE) 6116 pp_state = 0; 6117 else 6118 pp_state = PP_STATE_LS; 6119 6120 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6121 PP_BLOCK_GFX_RLC, 6122 pp_support_state, 6123 pp_state); 6124 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6125 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6126 } 6127 6128 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6129 pp_support_state = PP_STATE_SUPPORT_LS; 6130 6131 if (state == AMD_CG_STATE_UNGATE) 6132 pp_state = 0; 6133 else 6134 pp_state = PP_STATE_LS; 6135 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6136 PP_BLOCK_GFX_CP, 6137 pp_support_state, 6138 pp_state); 6139 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6140 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6141 } 6142 6143 return 0; 6144 } 6145 6146 static int gfx_v8_0_set_clockgating_state(void *handle, 6147 enum amd_clockgating_state state) 6148 { 6149 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6150 6151 if (amdgpu_sriov_vf(adev)) 6152 return 0; 6153 6154 switch (adev->asic_type) { 6155 case CHIP_FIJI: 6156 case CHIP_CARRIZO: 6157 case CHIP_STONEY: 6158 gfx_v8_0_update_gfx_clock_gating(adev, 6159 state == AMD_CG_STATE_GATE); 6160 break; 6161 case CHIP_TONGA: 6162 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6163 break; 6164 case CHIP_POLARIS10: 6165 case CHIP_POLARIS11: 6166 case CHIP_POLARIS12: 6167 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6168 break; 6169 default: 6170 break; 6171 } 6172 return 0; 6173 } 6174 6175 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6176 { 6177 return ring->adev->wb.wb[ring->rptr_offs]; 6178 } 6179 6180 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6181 { 6182 struct amdgpu_device *adev = ring->adev; 6183 6184 if (ring->use_doorbell) 6185 /* XXX check if swapping is necessary on BE */ 6186 return ring->adev->wb.wb[ring->wptr_offs]; 6187 else 6188 return RREG32(mmCP_RB0_WPTR); 6189 } 6190 6191 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6192 { 6193 struct amdgpu_device *adev = ring->adev; 6194 6195 if (ring->use_doorbell) { 6196 /* XXX check if swapping is necessary on BE */ 6197 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6198 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6199 } else { 6200 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6201 (void)RREG32(mmCP_RB0_WPTR); 6202 } 6203 } 6204 6205 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6206 { 6207 u32 ref_and_mask, reg_mem_engine; 6208 6209 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6210 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6211 switch (ring->me) { 6212 case 1: 6213 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6214 break; 6215 case 2: 6216 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6217 break; 6218 default: 6219 return; 6220 } 6221 reg_mem_engine = 0; 6222 } else { 6223 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6224 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6225 } 6226 6227 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6228 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6229 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6230 reg_mem_engine)); 6231 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6232 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6233 amdgpu_ring_write(ring, ref_and_mask); 6234 amdgpu_ring_write(ring, ref_and_mask); 6235 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6236 } 6237 6238 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6239 { 6240 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6241 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6242 EVENT_INDEX(4)); 6243 6244 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6245 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6246 EVENT_INDEX(0)); 6247 } 6248 6249 6250 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6251 { 6252 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6253 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6254 WRITE_DATA_DST_SEL(0) | 6255 WR_CONFIRM)); 6256 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6257 amdgpu_ring_write(ring, 0); 6258 amdgpu_ring_write(ring, 1); 6259 6260 } 6261 6262 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6263 struct amdgpu_ib *ib, 6264 unsigned vm_id, bool ctx_switch) 6265 { 6266 u32 header, control = 0; 6267 6268 if (ib->flags & AMDGPU_IB_FLAG_CE) 6269 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6270 else 6271 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6272 6273 control |= ib->length_dw | (vm_id << 24); 6274 6275 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6276 control |= INDIRECT_BUFFER_PRE_ENB(1); 6277 6278 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6279 gfx_v8_0_ring_emit_de_meta(ring); 6280 } 6281 6282 amdgpu_ring_write(ring, header); 6283 amdgpu_ring_write(ring, 6284 #ifdef __BIG_ENDIAN 6285 (2 << 0) | 6286 #endif 6287 (ib->gpu_addr & 0xFFFFFFFC)); 6288 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6289 amdgpu_ring_write(ring, control); 6290 } 6291 6292 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6293 struct amdgpu_ib *ib, 6294 unsigned vm_id, bool ctx_switch) 6295 { 6296 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6297 6298 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6299 amdgpu_ring_write(ring, 6300 #ifdef __BIG_ENDIAN 6301 (2 << 0) | 6302 #endif 6303 (ib->gpu_addr & 0xFFFFFFFC)); 6304 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6305 amdgpu_ring_write(ring, control); 6306 } 6307 6308 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6309 u64 seq, unsigned flags) 6310 { 6311 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6312 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6313 6314 /* EVENT_WRITE_EOP - flush caches, send int */ 6315 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6316 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6317 EOP_TC_ACTION_EN | 6318 EOP_TC_WB_ACTION_EN | 6319 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6320 EVENT_INDEX(5))); 6321 amdgpu_ring_write(ring, addr & 0xfffffffc); 6322 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6323 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6324 amdgpu_ring_write(ring, lower_32_bits(seq)); 6325 amdgpu_ring_write(ring, upper_32_bits(seq)); 6326 6327 } 6328 6329 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6330 { 6331 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6332 uint32_t seq = ring->fence_drv.sync_seq; 6333 uint64_t addr = ring->fence_drv.gpu_addr; 6334 6335 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6336 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6337 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6338 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6339 amdgpu_ring_write(ring, addr & 0xfffffffc); 6340 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6341 amdgpu_ring_write(ring, seq); 6342 amdgpu_ring_write(ring, 0xffffffff); 6343 amdgpu_ring_write(ring, 4); /* poll interval */ 6344 } 6345 6346 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6347 unsigned vm_id, uint64_t pd_addr) 6348 { 6349 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6350 6351 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6352 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6353 WRITE_DATA_DST_SEL(0)) | 6354 WR_CONFIRM); 6355 if (vm_id < 8) { 6356 amdgpu_ring_write(ring, 6357 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6358 } else { 6359 amdgpu_ring_write(ring, 6360 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6361 } 6362 amdgpu_ring_write(ring, 0); 6363 amdgpu_ring_write(ring, pd_addr >> 12); 6364 6365 /* bits 0-15 are the VM contexts0-15 */ 6366 /* invalidate the cache */ 6367 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6368 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6369 WRITE_DATA_DST_SEL(0))); 6370 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6371 amdgpu_ring_write(ring, 0); 6372 amdgpu_ring_write(ring, 1 << vm_id); 6373 6374 /* wait for the invalidate to complete */ 6375 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6376 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6377 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6378 WAIT_REG_MEM_ENGINE(0))); /* me */ 6379 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6380 amdgpu_ring_write(ring, 0); 6381 amdgpu_ring_write(ring, 0); /* ref */ 6382 amdgpu_ring_write(ring, 0); /* mask */ 6383 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6384 6385 /* compute doesn't have PFP */ 6386 if (usepfp) { 6387 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6388 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6389 amdgpu_ring_write(ring, 0x0); 6390 } 6391 } 6392 6393 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6394 { 6395 return ring->adev->wb.wb[ring->wptr_offs]; 6396 } 6397 6398 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6399 { 6400 struct amdgpu_device *adev = ring->adev; 6401 6402 /* XXX check if swapping is necessary on BE */ 6403 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6404 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6405 } 6406 6407 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6408 bool acquire) 6409 { 6410 struct amdgpu_device *adev = ring->adev; 6411 int pipe_num, tmp, reg; 6412 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6413 6414 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6415 6416 /* first me only has 2 entries, GFX and HP3D */ 6417 if (ring->me > 0) 6418 pipe_num -= 2; 6419 6420 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6421 tmp = RREG32(reg); 6422 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6423 WREG32(reg, tmp); 6424 } 6425 6426 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6427 struct amdgpu_ring *ring, 6428 bool acquire) 6429 { 6430 int i, pipe; 6431 bool reserve; 6432 struct amdgpu_ring *iring; 6433 6434 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6435 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6436 if (acquire) 6437 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6438 else 6439 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6440 6441 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6442 /* Clear all reservations - everyone reacquires all resources */ 6443 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6444 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6445 true); 6446 6447 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6448 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6449 true); 6450 } else { 6451 /* Lower all pipes without a current reservation */ 6452 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6453 iring = &adev->gfx.gfx_ring[i]; 6454 pipe = amdgpu_gfx_queue_to_bit(adev, 6455 iring->me, 6456 iring->pipe, 6457 0); 6458 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6459 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6460 } 6461 6462 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6463 iring = &adev->gfx.compute_ring[i]; 6464 pipe = amdgpu_gfx_queue_to_bit(adev, 6465 iring->me, 6466 iring->pipe, 6467 0); 6468 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6469 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6470 } 6471 } 6472 6473 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6474 } 6475 6476 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6477 struct amdgpu_ring *ring, 6478 bool acquire) 6479 { 6480 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6481 uint32_t queue_priority = acquire ? 0xf : 0x0; 6482 6483 mutex_lock(&adev->srbm_mutex); 6484 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6485 6486 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6487 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6488 6489 vi_srbm_select(adev, 0, 0, 0, 0); 6490 mutex_unlock(&adev->srbm_mutex); 6491 } 6492 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6493 enum amd_sched_priority priority) 6494 { 6495 struct amdgpu_device *adev = ring->adev; 6496 bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; 6497 6498 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6499 return; 6500 6501 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6502 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6503 } 6504 6505 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6506 u64 addr, u64 seq, 6507 unsigned flags) 6508 { 6509 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6510 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6511 6512 /* RELEASE_MEM - flush caches, send int */ 6513 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6514 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6515 EOP_TC_ACTION_EN | 6516 EOP_TC_WB_ACTION_EN | 6517 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6518 EVENT_INDEX(5))); 6519 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6520 amdgpu_ring_write(ring, addr & 0xfffffffc); 6521 amdgpu_ring_write(ring, upper_32_bits(addr)); 6522 amdgpu_ring_write(ring, lower_32_bits(seq)); 6523 amdgpu_ring_write(ring, upper_32_bits(seq)); 6524 } 6525 6526 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6527 u64 seq, unsigned int flags) 6528 { 6529 /* we only allocate 32bit for each seq wb address */ 6530 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6531 6532 /* write fence seq to the "addr" */ 6533 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6534 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6535 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6536 amdgpu_ring_write(ring, lower_32_bits(addr)); 6537 amdgpu_ring_write(ring, upper_32_bits(addr)); 6538 amdgpu_ring_write(ring, lower_32_bits(seq)); 6539 6540 if (flags & AMDGPU_FENCE_FLAG_INT) { 6541 /* set register to trigger INT */ 6542 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6543 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6544 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6545 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6546 amdgpu_ring_write(ring, 0); 6547 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6548 } 6549 } 6550 6551 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6552 { 6553 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6554 amdgpu_ring_write(ring, 0); 6555 } 6556 6557 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6558 { 6559 uint32_t dw2 = 0; 6560 6561 if (amdgpu_sriov_vf(ring->adev)) 6562 gfx_v8_0_ring_emit_ce_meta(ring); 6563 6564 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6565 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6566 gfx_v8_0_ring_emit_vgt_flush(ring); 6567 /* set load_global_config & load_global_uconfig */ 6568 dw2 |= 0x8001; 6569 /* set load_cs_sh_regs */ 6570 dw2 |= 0x01000000; 6571 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6572 dw2 |= 0x10002; 6573 6574 /* set load_ce_ram if preamble presented */ 6575 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6576 dw2 |= 0x10000000; 6577 } else { 6578 /* still load_ce_ram if this is the first time preamble presented 6579 * although there is no context switch happens. 6580 */ 6581 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6582 dw2 |= 0x10000000; 6583 } 6584 6585 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6586 amdgpu_ring_write(ring, dw2); 6587 amdgpu_ring_write(ring, 0); 6588 } 6589 6590 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6591 { 6592 unsigned ret; 6593 6594 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6595 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6596 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6597 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6598 ret = ring->wptr & ring->buf_mask; 6599 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6600 return ret; 6601 } 6602 6603 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6604 { 6605 unsigned cur; 6606 6607 BUG_ON(offset > ring->buf_mask); 6608 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6609 6610 cur = (ring->wptr & ring->buf_mask) - 1; 6611 if (likely(cur > offset)) 6612 ring->ring[offset] = cur - offset; 6613 else 6614 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6615 } 6616 6617 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6618 { 6619 struct amdgpu_device *adev = ring->adev; 6620 6621 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6622 amdgpu_ring_write(ring, 0 | /* src: register*/ 6623 (5 << 8) | /* dst: memory */ 6624 (1 << 20)); /* write confirm */ 6625 amdgpu_ring_write(ring, reg); 6626 amdgpu_ring_write(ring, 0); 6627 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6628 adev->virt.reg_val_offs * 4)); 6629 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6630 adev->virt.reg_val_offs * 4)); 6631 } 6632 6633 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6634 uint32_t val) 6635 { 6636 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6637 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6638 amdgpu_ring_write(ring, reg); 6639 amdgpu_ring_write(ring, 0); 6640 amdgpu_ring_write(ring, val); 6641 } 6642 6643 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6644 enum amdgpu_interrupt_state state) 6645 { 6646 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6647 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6648 } 6649 6650 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6651 int me, int pipe, 6652 enum amdgpu_interrupt_state state) 6653 { 6654 u32 mec_int_cntl, mec_int_cntl_reg; 6655 6656 /* 6657 * amdgpu controls only the first MEC. That's why this function only 6658 * handles the setting of interrupts for this specific MEC. All other 6659 * pipes' interrupts are set by amdkfd. 6660 */ 6661 6662 if (me == 1) { 6663 switch (pipe) { 6664 case 0: 6665 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6666 break; 6667 case 1: 6668 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6669 break; 6670 case 2: 6671 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6672 break; 6673 case 3: 6674 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6675 break; 6676 default: 6677 DRM_DEBUG("invalid pipe %d\n", pipe); 6678 return; 6679 } 6680 } else { 6681 DRM_DEBUG("invalid me %d\n", me); 6682 return; 6683 } 6684 6685 switch (state) { 6686 case AMDGPU_IRQ_STATE_DISABLE: 6687 mec_int_cntl = RREG32(mec_int_cntl_reg); 6688 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6689 WREG32(mec_int_cntl_reg, mec_int_cntl); 6690 break; 6691 case AMDGPU_IRQ_STATE_ENABLE: 6692 mec_int_cntl = RREG32(mec_int_cntl_reg); 6693 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6694 WREG32(mec_int_cntl_reg, mec_int_cntl); 6695 break; 6696 default: 6697 break; 6698 } 6699 } 6700 6701 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6702 struct amdgpu_irq_src *source, 6703 unsigned type, 6704 enum amdgpu_interrupt_state state) 6705 { 6706 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6707 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6708 6709 return 0; 6710 } 6711 6712 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6713 struct amdgpu_irq_src *source, 6714 unsigned type, 6715 enum amdgpu_interrupt_state state) 6716 { 6717 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6718 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6719 6720 return 0; 6721 } 6722 6723 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6724 struct amdgpu_irq_src *src, 6725 unsigned type, 6726 enum amdgpu_interrupt_state state) 6727 { 6728 switch (type) { 6729 case AMDGPU_CP_IRQ_GFX_EOP: 6730 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6731 break; 6732 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6733 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6734 break; 6735 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6736 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6737 break; 6738 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6739 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6740 break; 6741 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6742 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6743 break; 6744 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6745 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6746 break; 6747 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6748 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6749 break; 6750 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6751 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6752 break; 6753 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6754 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6755 break; 6756 default: 6757 break; 6758 } 6759 return 0; 6760 } 6761 6762 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6763 struct amdgpu_irq_src *source, 6764 struct amdgpu_iv_entry *entry) 6765 { 6766 int i; 6767 u8 me_id, pipe_id, queue_id; 6768 struct amdgpu_ring *ring; 6769 6770 DRM_DEBUG("IH: CP EOP\n"); 6771 me_id = (entry->ring_id & 0x0c) >> 2; 6772 pipe_id = (entry->ring_id & 0x03) >> 0; 6773 queue_id = (entry->ring_id & 0x70) >> 4; 6774 6775 switch (me_id) { 6776 case 0: 6777 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6778 break; 6779 case 1: 6780 case 2: 6781 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6782 ring = &adev->gfx.compute_ring[i]; 6783 /* Per-queue interrupt is supported for MEC starting from VI. 6784 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6785 */ 6786 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6787 amdgpu_fence_process(ring); 6788 } 6789 break; 6790 } 6791 return 0; 6792 } 6793 6794 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6795 struct amdgpu_irq_src *source, 6796 struct amdgpu_iv_entry *entry) 6797 { 6798 DRM_ERROR("Illegal register access in command stream\n"); 6799 schedule_work(&adev->reset_work); 6800 return 0; 6801 } 6802 6803 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6804 struct amdgpu_irq_src *source, 6805 struct amdgpu_iv_entry *entry) 6806 { 6807 DRM_ERROR("Illegal instruction in command stream\n"); 6808 schedule_work(&adev->reset_work); 6809 return 0; 6810 } 6811 6812 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6813 struct amdgpu_irq_src *src, 6814 unsigned int type, 6815 enum amdgpu_interrupt_state state) 6816 { 6817 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6818 6819 switch (type) { 6820 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6821 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6822 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6823 if (ring->me == 1) 6824 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6825 ring->pipe, 6826 GENERIC2_INT_ENABLE, 6827 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6828 else 6829 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6830 ring->pipe, 6831 GENERIC2_INT_ENABLE, 6832 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6833 break; 6834 default: 6835 BUG(); /* kiq only support GENERIC2_INT now */ 6836 break; 6837 } 6838 return 0; 6839 } 6840 6841 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 6842 struct amdgpu_irq_src *source, 6843 struct amdgpu_iv_entry *entry) 6844 { 6845 u8 me_id, pipe_id, queue_id; 6846 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6847 6848 me_id = (entry->ring_id & 0x0c) >> 2; 6849 pipe_id = (entry->ring_id & 0x03) >> 0; 6850 queue_id = (entry->ring_id & 0x70) >> 4; 6851 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 6852 me_id, pipe_id, queue_id); 6853 6854 amdgpu_fence_process(ring); 6855 return 0; 6856 } 6857 6858 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6859 .name = "gfx_v8_0", 6860 .early_init = gfx_v8_0_early_init, 6861 .late_init = gfx_v8_0_late_init, 6862 .sw_init = gfx_v8_0_sw_init, 6863 .sw_fini = gfx_v8_0_sw_fini, 6864 .hw_init = gfx_v8_0_hw_init, 6865 .hw_fini = gfx_v8_0_hw_fini, 6866 .suspend = gfx_v8_0_suspend, 6867 .resume = gfx_v8_0_resume, 6868 .is_idle = gfx_v8_0_is_idle, 6869 .wait_for_idle = gfx_v8_0_wait_for_idle, 6870 .check_soft_reset = gfx_v8_0_check_soft_reset, 6871 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6872 .soft_reset = gfx_v8_0_soft_reset, 6873 .post_soft_reset = gfx_v8_0_post_soft_reset, 6874 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6875 .set_powergating_state = gfx_v8_0_set_powergating_state, 6876 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6877 }; 6878 6879 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6880 .type = AMDGPU_RING_TYPE_GFX, 6881 .align_mask = 0xff, 6882 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6883 .support_64bit_ptrs = false, 6884 .get_rptr = gfx_v8_0_ring_get_rptr, 6885 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6886 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6887 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6888 5 + /* COND_EXEC */ 6889 7 + /* PIPELINE_SYNC */ 6890 19 + /* VM_FLUSH */ 6891 8 + /* FENCE for VM_FLUSH */ 6892 20 + /* GDS switch */ 6893 4 + /* double SWITCH_BUFFER, 6894 the first COND_EXEC jump to the place just 6895 prior to this double SWITCH_BUFFER */ 6896 5 + /* COND_EXEC */ 6897 7 + /* HDP_flush */ 6898 4 + /* VGT_flush */ 6899 14 + /* CE_META */ 6900 31 + /* DE_META */ 6901 3 + /* CNTX_CTRL */ 6902 5 + /* HDP_INVL */ 6903 8 + 8 + /* FENCE x2 */ 6904 2, /* SWITCH_BUFFER */ 6905 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6906 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6907 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6908 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6909 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6910 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6911 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6912 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6913 .test_ring = gfx_v8_0_ring_test_ring, 6914 .test_ib = gfx_v8_0_ring_test_ib, 6915 .insert_nop = amdgpu_ring_insert_nop, 6916 .pad_ib = amdgpu_ring_generic_pad_ib, 6917 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6918 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6919 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6920 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6921 }; 6922 6923 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6924 .type = AMDGPU_RING_TYPE_COMPUTE, 6925 .align_mask = 0xff, 6926 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6927 .support_64bit_ptrs = false, 6928 .get_rptr = gfx_v8_0_ring_get_rptr, 6929 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6930 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6931 .emit_frame_size = 6932 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6933 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6934 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6935 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6936 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6937 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6938 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6939 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6940 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6941 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6942 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6943 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6944 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6945 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6946 .test_ring = gfx_v8_0_ring_test_ring, 6947 .test_ib = gfx_v8_0_ring_test_ib, 6948 .insert_nop = amdgpu_ring_insert_nop, 6949 .pad_ib = amdgpu_ring_generic_pad_ib, 6950 .set_priority = gfx_v8_0_ring_set_priority_compute, 6951 }; 6952 6953 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6954 .type = AMDGPU_RING_TYPE_KIQ, 6955 .align_mask = 0xff, 6956 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6957 .support_64bit_ptrs = false, 6958 .get_rptr = gfx_v8_0_ring_get_rptr, 6959 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6960 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6961 .emit_frame_size = 6962 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6963 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6964 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6965 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6966 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6967 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6968 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6969 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6970 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6971 .test_ring = gfx_v8_0_ring_test_ring, 6972 .test_ib = gfx_v8_0_ring_test_ib, 6973 .insert_nop = amdgpu_ring_insert_nop, 6974 .pad_ib = amdgpu_ring_generic_pad_ib, 6975 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6976 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6977 }; 6978 6979 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6980 { 6981 int i; 6982 6983 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6984 6985 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6986 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6987 6988 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6989 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6990 } 6991 6992 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6993 .set = gfx_v8_0_set_eop_interrupt_state, 6994 .process = gfx_v8_0_eop_irq, 6995 }; 6996 6997 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6998 .set = gfx_v8_0_set_priv_reg_fault_state, 6999 .process = gfx_v8_0_priv_reg_irq, 7000 }; 7001 7002 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7003 .set = gfx_v8_0_set_priv_inst_fault_state, 7004 .process = gfx_v8_0_priv_inst_irq, 7005 }; 7006 7007 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7008 .set = gfx_v8_0_kiq_set_interrupt_state, 7009 .process = gfx_v8_0_kiq_irq, 7010 }; 7011 7012 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7013 { 7014 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7015 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7016 7017 adev->gfx.priv_reg_irq.num_types = 1; 7018 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7019 7020 adev->gfx.priv_inst_irq.num_types = 1; 7021 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7022 7023 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7024 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7025 } 7026 7027 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7028 { 7029 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7030 } 7031 7032 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7033 { 7034 /* init asci gds info */ 7035 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7036 adev->gds.gws.total_size = 64; 7037 adev->gds.oa.total_size = 16; 7038 7039 if (adev->gds.mem.total_size == 64 * 1024) { 7040 adev->gds.mem.gfx_partition_size = 4096; 7041 adev->gds.mem.cs_partition_size = 4096; 7042 7043 adev->gds.gws.gfx_partition_size = 4; 7044 adev->gds.gws.cs_partition_size = 4; 7045 7046 adev->gds.oa.gfx_partition_size = 4; 7047 adev->gds.oa.cs_partition_size = 1; 7048 } else { 7049 adev->gds.mem.gfx_partition_size = 1024; 7050 adev->gds.mem.cs_partition_size = 1024; 7051 7052 adev->gds.gws.gfx_partition_size = 16; 7053 adev->gds.gws.cs_partition_size = 16; 7054 7055 adev->gds.oa.gfx_partition_size = 4; 7056 adev->gds.oa.cs_partition_size = 4; 7057 } 7058 } 7059 7060 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7061 u32 bitmap) 7062 { 7063 u32 data; 7064 7065 if (!bitmap) 7066 return; 7067 7068 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7069 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7070 7071 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7072 } 7073 7074 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7075 { 7076 u32 data, mask; 7077 7078 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7079 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7080 7081 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7082 7083 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7084 } 7085 7086 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7087 { 7088 int i, j, k, counter, active_cu_number = 0; 7089 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7090 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7091 unsigned disable_masks[4 * 2]; 7092 u32 ao_cu_num; 7093 7094 memset(cu_info, 0, sizeof(*cu_info)); 7095 7096 if (adev->flags & AMD_IS_APU) 7097 ao_cu_num = 2; 7098 else 7099 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7100 7101 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7102 7103 mutex_lock(&adev->grbm_idx_mutex); 7104 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7105 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7106 mask = 1; 7107 ao_bitmap = 0; 7108 counter = 0; 7109 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7110 if (i < 4 && j < 2) 7111 gfx_v8_0_set_user_cu_inactive_bitmap( 7112 adev, disable_masks[i * 2 + j]); 7113 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7114 cu_info->bitmap[i][j] = bitmap; 7115 7116 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7117 if (bitmap & mask) { 7118 if (counter < ao_cu_num) 7119 ao_bitmap |= mask; 7120 counter ++; 7121 } 7122 mask <<= 1; 7123 } 7124 active_cu_number += counter; 7125 if (i < 2 && j < 2) 7126 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7127 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7128 } 7129 } 7130 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7131 mutex_unlock(&adev->grbm_idx_mutex); 7132 7133 cu_info->number = active_cu_number; 7134 cu_info->ao_cu_mask = ao_cu_mask; 7135 } 7136 7137 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7138 { 7139 .type = AMD_IP_BLOCK_TYPE_GFX, 7140 .major = 8, 7141 .minor = 0, 7142 .rev = 0, 7143 .funcs = &gfx_v8_0_ip_funcs, 7144 }; 7145 7146 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7147 { 7148 .type = AMD_IP_BLOCK_TYPE_GFX, 7149 .major = 8, 7150 .minor = 1, 7151 .rev = 0, 7152 .funcs = &gfx_v8_0_ip_funcs, 7153 }; 7154 7155 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7156 { 7157 uint64_t ce_payload_addr; 7158 int cnt_ce; 7159 union { 7160 struct vi_ce_ib_state regular; 7161 struct vi_ce_ib_state_chained_ib chained; 7162 } ce_payload = {}; 7163 7164 if (ring->adev->virt.chained_ib_support) { 7165 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7166 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7167 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7168 } else { 7169 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7170 offsetof(struct vi_gfx_meta_data, ce_payload); 7171 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7172 } 7173 7174 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7175 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7176 WRITE_DATA_DST_SEL(8) | 7177 WR_CONFIRM) | 7178 WRITE_DATA_CACHE_POLICY(0)); 7179 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7180 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7181 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7182 } 7183 7184 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7185 { 7186 uint64_t de_payload_addr, gds_addr, csa_addr; 7187 int cnt_de; 7188 union { 7189 struct vi_de_ib_state regular; 7190 struct vi_de_ib_state_chained_ib chained; 7191 } de_payload = {}; 7192 7193 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 7194 gds_addr = csa_addr + 4096; 7195 if (ring->adev->virt.chained_ib_support) { 7196 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7197 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7198 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7199 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7200 } else { 7201 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7202 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7203 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7204 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7205 } 7206 7207 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7208 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7209 WRITE_DATA_DST_SEL(8) | 7210 WR_CONFIRM) | 7211 WRITE_DATA_CACHE_POLICY(0)); 7212 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7213 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7214 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7215 } 7216