1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vi_structs.h" 29 #include "vid.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_atombios.h" 32 #include "atombios_i2c.h" 33 #include "clearstate_vi.h" 34 35 #include "gmc/gmc_8_2_d.h" 36 #include "gmc/gmc_8_2_sh_mask.h" 37 38 #include "oss/oss_3_0_d.h" 39 #include "oss/oss_3_0_sh_mask.h" 40 41 #include "bif/bif_5_0_d.h" 42 #include "bif/bif_5_0_sh_mask.h" 43 #include "gca/gfx_8_0_d.h" 44 #include "gca/gfx_8_0_enum.h" 45 #include "gca/gfx_8_0_sh_mask.h" 46 #include "gca/gfx_8_0_enum.h" 47 48 #include "dce/dce_10_0_d.h" 49 #include "dce/dce_10_0_sh_mask.h" 50 51 #include "smu/smu_7_1_3_d.h" 52 53 #define GFX8_NUM_GFX_RINGS 1 54 #define GFX8_MEC_HPD_SIZE 2048 55 56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 60 61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 70 71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 77 78 /* BPM SERDES CMD */ 79 #define SET_BPM_SERDES_CMD 1 80 #define CLE_BPM_SERDES_CMD 0 81 82 /* BPM Register Address*/ 83 enum { 84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 89 BPM_REG_FGCG_MAX 90 }; 91 92 #define RLC_FormatDirectRegListLength 14 93 94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 100 101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 119 120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 126 127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 133 134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 140 141 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 147 148 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 149 { 150 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 151 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 152 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 153 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 154 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 155 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 156 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 157 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 158 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 159 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 160 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 161 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 162 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 163 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 164 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 165 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 166 }; 167 168 static const u32 golden_settings_tonga_a11[] = 169 { 170 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 171 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 172 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 173 mmGB_GPU_ID, 0x0000000f, 0x00000000, 174 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 175 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 176 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 177 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 178 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 179 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 180 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 181 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 182 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 183 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 184 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 185 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 186 }; 187 188 static const u32 tonga_golden_common_all[] = 189 { 190 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 191 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 192 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 193 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 194 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 195 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 196 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 197 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 198 }; 199 200 static const u32 tonga_mgcg_cgcg_init[] = 201 { 202 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 204 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 205 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 209 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 211 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 212 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 213 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 214 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 215 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 216 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 219 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 220 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 221 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 222 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 223 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 224 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 225 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 227 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 228 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 229 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 231 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 233 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 234 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 235 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 236 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 237 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 238 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 239 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 240 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 241 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 242 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 243 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 244 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 245 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 246 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 247 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 248 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 249 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 250 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 251 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 252 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 253 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 254 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 255 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 256 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 257 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 258 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 274 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 275 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 276 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 277 }; 278 279 static const u32 golden_settings_polaris11_a11[] = 280 { 281 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 282 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 283 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 284 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 285 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 286 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 287 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 288 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 289 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 290 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 291 mmSQ_CONFIG, 0x07f80000, 0x01180000, 292 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 293 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 294 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 295 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 296 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 297 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 298 }; 299 300 static const u32 polaris11_golden_common_all[] = 301 { 302 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 303 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 304 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 305 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 306 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 307 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 308 }; 309 310 static const u32 golden_settings_polaris10_a11[] = 311 { 312 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 313 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 314 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 315 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 316 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 317 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 318 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 319 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 320 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 321 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 322 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 323 mmSQ_CONFIG, 0x07f80000, 0x07180000, 324 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 325 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 326 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 327 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 328 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 329 }; 330 331 static const u32 polaris10_golden_common_all[] = 332 { 333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 341 }; 342 343 static const u32 fiji_golden_common_all[] = 344 { 345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 346 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 347 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 348 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 349 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 350 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 351 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 352 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 353 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 354 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 355 }; 356 357 static const u32 golden_settings_fiji_a10[] = 358 { 359 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 360 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 361 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 362 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 363 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 364 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 365 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 366 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 367 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 368 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 369 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 370 }; 371 372 static const u32 fiji_mgcg_cgcg_init[] = 373 { 374 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 375 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 376 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 377 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 381 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 383 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 384 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 385 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 386 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 387 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 388 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 391 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 392 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 393 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 394 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 395 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 396 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 397 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 399 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 400 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 401 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 402 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 403 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 404 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 405 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 406 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 407 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 408 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 409 }; 410 411 static const u32 golden_settings_iceland_a11[] = 412 { 413 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 414 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 415 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 416 mmGB_GPU_ID, 0x0000000f, 0x00000000, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 420 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 421 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 422 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 423 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 424 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 425 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 426 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 427 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 429 }; 430 431 static const u32 iceland_golden_common_all[] = 432 { 433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 441 }; 442 443 static const u32 iceland_mgcg_cgcg_init[] = 444 { 445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 506 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 507 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 508 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 509 }; 510 511 static const u32 cz_golden_settings_a11[] = 512 { 513 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 514 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 515 mmGB_GPU_ID, 0x0000000f, 0x00000000, 516 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 517 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 518 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 519 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 520 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 521 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 522 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 523 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 524 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 525 }; 526 527 static const u32 cz_golden_common_all[] = 528 { 529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 530 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 531 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 532 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 533 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 534 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 535 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 536 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 537 }; 538 539 static const u32 cz_mgcg_cgcg_init[] = 540 { 541 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 543 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 544 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 550 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 551 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 552 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 553 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 554 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 555 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 558 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 559 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 560 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 561 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 562 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 563 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 564 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 566 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 567 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 568 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 569 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 570 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 571 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 572 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 573 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 574 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 575 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 576 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 577 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 578 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 579 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 580 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 581 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 582 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 583 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 584 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 585 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 586 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 587 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 588 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 589 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 590 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 591 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 592 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 593 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 594 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 595 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 596 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 597 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 598 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 599 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 600 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 601 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 602 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 603 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 604 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 605 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 606 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 607 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 608 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 609 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 610 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 611 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 612 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 613 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 614 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 615 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 616 }; 617 618 static const u32 stoney_golden_settings_a11[] = 619 { 620 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 621 mmGB_GPU_ID, 0x0000000f, 0x00000000, 622 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 623 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 624 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 625 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 626 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 627 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 628 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 629 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 630 }; 631 632 static const u32 stoney_golden_common_all[] = 633 { 634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 635 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 636 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 637 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 638 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 639 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 640 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 641 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 642 }; 643 644 static const u32 stoney_mgcg_cgcg_init[] = 645 { 646 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 647 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 648 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 649 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 650 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 651 }; 652 653 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 654 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 655 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 656 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 657 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 658 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 659 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 660 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 661 662 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 663 { 664 switch (adev->asic_type) { 665 case CHIP_TOPAZ: 666 amdgpu_program_register_sequence(adev, 667 iceland_mgcg_cgcg_init, 668 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 669 amdgpu_program_register_sequence(adev, 670 golden_settings_iceland_a11, 671 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 672 amdgpu_program_register_sequence(adev, 673 iceland_golden_common_all, 674 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 675 break; 676 case CHIP_FIJI: 677 amdgpu_program_register_sequence(adev, 678 fiji_mgcg_cgcg_init, 679 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 680 amdgpu_program_register_sequence(adev, 681 golden_settings_fiji_a10, 682 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 683 amdgpu_program_register_sequence(adev, 684 fiji_golden_common_all, 685 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 686 break; 687 688 case CHIP_TONGA: 689 amdgpu_program_register_sequence(adev, 690 tonga_mgcg_cgcg_init, 691 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 692 amdgpu_program_register_sequence(adev, 693 golden_settings_tonga_a11, 694 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 695 amdgpu_program_register_sequence(adev, 696 tonga_golden_common_all, 697 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 698 break; 699 case CHIP_POLARIS11: 700 case CHIP_POLARIS12: 701 amdgpu_program_register_sequence(adev, 702 golden_settings_polaris11_a11, 703 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 704 amdgpu_program_register_sequence(adev, 705 polaris11_golden_common_all, 706 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 707 break; 708 case CHIP_POLARIS10: 709 amdgpu_program_register_sequence(adev, 710 golden_settings_polaris10_a11, 711 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 712 amdgpu_program_register_sequence(adev, 713 polaris10_golden_common_all, 714 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 715 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 716 if (adev->pdev->revision == 0xc7 && 717 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 718 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 719 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 722 } 723 break; 724 case CHIP_CARRIZO: 725 amdgpu_program_register_sequence(adev, 726 cz_mgcg_cgcg_init, 727 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 728 amdgpu_program_register_sequence(adev, 729 cz_golden_settings_a11, 730 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 731 amdgpu_program_register_sequence(adev, 732 cz_golden_common_all, 733 (const u32)ARRAY_SIZE(cz_golden_common_all)); 734 break; 735 case CHIP_STONEY: 736 amdgpu_program_register_sequence(adev, 737 stoney_mgcg_cgcg_init, 738 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 739 amdgpu_program_register_sequence(adev, 740 stoney_golden_settings_a11, 741 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 742 amdgpu_program_register_sequence(adev, 743 stoney_golden_common_all, 744 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 745 break; 746 default: 747 break; 748 } 749 } 750 751 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 752 { 753 adev->gfx.scratch.num_reg = 7; 754 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 755 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 756 } 757 758 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 759 { 760 struct amdgpu_device *adev = ring->adev; 761 uint32_t scratch; 762 uint32_t tmp = 0; 763 unsigned i; 764 int r; 765 766 r = amdgpu_gfx_scratch_get(adev, &scratch); 767 if (r) { 768 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 769 return r; 770 } 771 WREG32(scratch, 0xCAFEDEAD); 772 r = amdgpu_ring_alloc(ring, 3); 773 if (r) { 774 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 775 ring->idx, r); 776 amdgpu_gfx_scratch_free(adev, scratch); 777 return r; 778 } 779 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 780 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 781 amdgpu_ring_write(ring, 0xDEADBEEF); 782 amdgpu_ring_commit(ring); 783 784 for (i = 0; i < adev->usec_timeout; i++) { 785 tmp = RREG32(scratch); 786 if (tmp == 0xDEADBEEF) 787 break; 788 DRM_UDELAY(1); 789 } 790 if (i < adev->usec_timeout) { 791 DRM_INFO("ring test on %d succeeded in %d usecs\n", 792 ring->idx, i); 793 } else { 794 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 795 ring->idx, scratch, tmp); 796 r = -EINVAL; 797 } 798 amdgpu_gfx_scratch_free(adev, scratch); 799 return r; 800 } 801 802 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 803 { 804 struct amdgpu_device *adev = ring->adev; 805 struct amdgpu_ib ib; 806 struct dma_fence *f = NULL; 807 uint32_t scratch; 808 uint32_t tmp = 0; 809 long r; 810 811 r = amdgpu_gfx_scratch_get(adev, &scratch); 812 if (r) { 813 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 814 return r; 815 } 816 WREG32(scratch, 0xCAFEDEAD); 817 memset(&ib, 0, sizeof(ib)); 818 r = amdgpu_ib_get(adev, NULL, 256, &ib); 819 if (r) { 820 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 821 goto err1; 822 } 823 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 824 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 825 ib.ptr[2] = 0xDEADBEEF; 826 ib.length_dw = 3; 827 828 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 829 if (r) 830 goto err2; 831 832 r = dma_fence_wait_timeout(f, false, timeout); 833 if (r == 0) { 834 DRM_ERROR("amdgpu: IB test timed out.\n"); 835 r = -ETIMEDOUT; 836 goto err2; 837 } else if (r < 0) { 838 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 839 goto err2; 840 } 841 tmp = RREG32(scratch); 842 if (tmp == 0xDEADBEEF) { 843 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 844 r = 0; 845 } else { 846 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 847 scratch, tmp); 848 r = -EINVAL; 849 } 850 err2: 851 amdgpu_ib_free(adev, &ib, NULL); 852 dma_fence_put(f); 853 err1: 854 amdgpu_gfx_scratch_free(adev, scratch); 855 return r; 856 } 857 858 859 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 860 { 861 release_firmware(adev->gfx.pfp_fw); 862 adev->gfx.pfp_fw = NULL; 863 release_firmware(adev->gfx.me_fw); 864 adev->gfx.me_fw = NULL; 865 release_firmware(adev->gfx.ce_fw); 866 adev->gfx.ce_fw = NULL; 867 release_firmware(adev->gfx.rlc_fw); 868 adev->gfx.rlc_fw = NULL; 869 release_firmware(adev->gfx.mec_fw); 870 adev->gfx.mec_fw = NULL; 871 if ((adev->asic_type != CHIP_STONEY) && 872 (adev->asic_type != CHIP_TOPAZ)) 873 release_firmware(adev->gfx.mec2_fw); 874 adev->gfx.mec2_fw = NULL; 875 876 kfree(adev->gfx.rlc.register_list_format); 877 } 878 879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 880 { 881 const char *chip_name; 882 char fw_name[30]; 883 int err; 884 struct amdgpu_firmware_info *info = NULL; 885 const struct common_firmware_header *header = NULL; 886 const struct gfx_firmware_header_v1_0 *cp_hdr; 887 const struct rlc_firmware_header_v2_0 *rlc_hdr; 888 unsigned int *tmp = NULL, i; 889 890 DRM_DEBUG("\n"); 891 892 switch (adev->asic_type) { 893 case CHIP_TOPAZ: 894 chip_name = "topaz"; 895 break; 896 case CHIP_TONGA: 897 chip_name = "tonga"; 898 break; 899 case CHIP_CARRIZO: 900 chip_name = "carrizo"; 901 break; 902 case CHIP_FIJI: 903 chip_name = "fiji"; 904 break; 905 case CHIP_POLARIS11: 906 chip_name = "polaris11"; 907 break; 908 case CHIP_POLARIS10: 909 chip_name = "polaris10"; 910 break; 911 case CHIP_POLARIS12: 912 chip_name = "polaris12"; 913 break; 914 case CHIP_STONEY: 915 chip_name = "stoney"; 916 break; 917 default: 918 BUG(); 919 } 920 921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 923 if (err) 924 goto out; 925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 926 if (err) 927 goto out; 928 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 931 932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 934 if (err) 935 goto out; 936 err = amdgpu_ucode_validate(adev->gfx.me_fw); 937 if (err) 938 goto out; 939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 941 942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 943 944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 946 if (err) 947 goto out; 948 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 949 if (err) 950 goto out; 951 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 954 955 /* 956 * Support for MCBP/Virtualization in combination with chained IBs is 957 * formal released on feature version #46 958 */ 959 if (adev->gfx.ce_feature_version >= 46 && 960 adev->gfx.pfp_feature_version >= 46) { 961 adev->virt.chained_ib_support = true; 962 DRM_INFO("Chained IB support enabled!\n"); 963 } else 964 adev->virt.chained_ib_support = false; 965 966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 968 if (err) 969 goto out; 970 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 971 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 972 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 973 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 974 975 adev->gfx.rlc.save_and_restore_offset = 976 le32_to_cpu(rlc_hdr->save_and_restore_offset); 977 adev->gfx.rlc.clear_state_descriptor_offset = 978 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 979 adev->gfx.rlc.avail_scratch_ram_locations = 980 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 981 adev->gfx.rlc.reg_restore_list_size = 982 le32_to_cpu(rlc_hdr->reg_restore_list_size); 983 adev->gfx.rlc.reg_list_format_start = 984 le32_to_cpu(rlc_hdr->reg_list_format_start); 985 adev->gfx.rlc.reg_list_format_separate_start = 986 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 987 adev->gfx.rlc.starting_offsets_start = 988 le32_to_cpu(rlc_hdr->starting_offsets_start); 989 adev->gfx.rlc.reg_list_format_size_bytes = 990 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 991 adev->gfx.rlc.reg_list_size_bytes = 992 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 993 994 adev->gfx.rlc.register_list_format = 995 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 996 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 997 998 if (!adev->gfx.rlc.register_list_format) { 999 err = -ENOMEM; 1000 goto out; 1001 } 1002 1003 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1004 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1005 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1006 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1007 1008 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1009 1010 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1011 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1014 1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1023 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if ((adev->asic_type != CHIP_STONEY) && 1027 (adev->asic_type != CHIP_TOPAZ)) { 1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1030 if (!err) { 1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1032 if (err) 1033 goto out; 1034 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1035 adev->gfx.mec2_fw->data; 1036 adev->gfx.mec2_fw_version = 1037 le32_to_cpu(cp_hdr->header.ucode_version); 1038 adev->gfx.mec2_feature_version = 1039 le32_to_cpu(cp_hdr->ucode_feature_version); 1040 } else { 1041 err = 0; 1042 adev->gfx.mec2_fw = NULL; 1043 } 1044 } 1045 1046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1048 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1049 info->fw = adev->gfx.pfp_fw; 1050 header = (const struct common_firmware_header *)info->fw->data; 1051 adev->firmware.fw_size += 1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1053 1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1055 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1056 info->fw = adev->gfx.me_fw; 1057 header = (const struct common_firmware_header *)info->fw->data; 1058 adev->firmware.fw_size += 1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1060 1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1062 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1063 info->fw = adev->gfx.ce_fw; 1064 header = (const struct common_firmware_header *)info->fw->data; 1065 adev->firmware.fw_size += 1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1067 1068 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1069 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1070 info->fw = adev->gfx.rlc_fw; 1071 header = (const struct common_firmware_header *)info->fw->data; 1072 adev->firmware.fw_size += 1073 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1074 1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1077 info->fw = adev->gfx.mec_fw; 1078 header = (const struct common_firmware_header *)info->fw->data; 1079 adev->firmware.fw_size += 1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1081 1082 /* we need account JT in */ 1083 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1084 adev->firmware.fw_size += 1085 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1086 1087 if (amdgpu_sriov_vf(adev)) { 1088 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1089 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1090 info->fw = adev->gfx.mec_fw; 1091 adev->firmware.fw_size += 1092 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1093 } 1094 1095 if (adev->gfx.mec2_fw) { 1096 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1097 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1098 info->fw = adev->gfx.mec2_fw; 1099 header = (const struct common_firmware_header *)info->fw->data; 1100 adev->firmware.fw_size += 1101 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1102 } 1103 1104 } 1105 1106 out: 1107 if (err) { 1108 dev_err(adev->dev, 1109 "gfx8: Failed to load firmware \"%s\"\n", 1110 fw_name); 1111 release_firmware(adev->gfx.pfp_fw); 1112 adev->gfx.pfp_fw = NULL; 1113 release_firmware(adev->gfx.me_fw); 1114 adev->gfx.me_fw = NULL; 1115 release_firmware(adev->gfx.ce_fw); 1116 adev->gfx.ce_fw = NULL; 1117 release_firmware(adev->gfx.rlc_fw); 1118 adev->gfx.rlc_fw = NULL; 1119 release_firmware(adev->gfx.mec_fw); 1120 adev->gfx.mec_fw = NULL; 1121 release_firmware(adev->gfx.mec2_fw); 1122 adev->gfx.mec2_fw = NULL; 1123 } 1124 return err; 1125 } 1126 1127 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1128 volatile u32 *buffer) 1129 { 1130 u32 count = 0, i; 1131 const struct cs_section_def *sect = NULL; 1132 const struct cs_extent_def *ext = NULL; 1133 1134 if (adev->gfx.rlc.cs_data == NULL) 1135 return; 1136 if (buffer == NULL) 1137 return; 1138 1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1140 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1141 1142 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1143 buffer[count++] = cpu_to_le32(0x80000000); 1144 buffer[count++] = cpu_to_le32(0x80000000); 1145 1146 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1147 for (ext = sect->section; ext->extent != NULL; ++ext) { 1148 if (sect->id == SECT_CONTEXT) { 1149 buffer[count++] = 1150 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1151 buffer[count++] = cpu_to_le32(ext->reg_index - 1152 PACKET3_SET_CONTEXT_REG_START); 1153 for (i = 0; i < ext->reg_count; i++) 1154 buffer[count++] = cpu_to_le32(ext->extent[i]); 1155 } else { 1156 return; 1157 } 1158 } 1159 } 1160 1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1162 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1163 PACKET3_SET_CONTEXT_REG_START); 1164 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1165 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1166 1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1168 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1169 1170 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1171 buffer[count++] = cpu_to_le32(0); 1172 } 1173 1174 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1175 { 1176 const __le32 *fw_data; 1177 volatile u32 *dst_ptr; 1178 int me, i, max_me = 4; 1179 u32 bo_offset = 0; 1180 u32 table_offset, table_size; 1181 1182 if (adev->asic_type == CHIP_CARRIZO) 1183 max_me = 5; 1184 1185 /* write the cp table buffer */ 1186 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1187 for (me = 0; me < max_me; me++) { 1188 if (me == 0) { 1189 const struct gfx_firmware_header_v1_0 *hdr = 1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1191 fw_data = (const __le32 *) 1192 (adev->gfx.ce_fw->data + 1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1194 table_offset = le32_to_cpu(hdr->jt_offset); 1195 table_size = le32_to_cpu(hdr->jt_size); 1196 } else if (me == 1) { 1197 const struct gfx_firmware_header_v1_0 *hdr = 1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1199 fw_data = (const __le32 *) 1200 (adev->gfx.pfp_fw->data + 1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1202 table_offset = le32_to_cpu(hdr->jt_offset); 1203 table_size = le32_to_cpu(hdr->jt_size); 1204 } else if (me == 2) { 1205 const struct gfx_firmware_header_v1_0 *hdr = 1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1207 fw_data = (const __le32 *) 1208 (adev->gfx.me_fw->data + 1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1210 table_offset = le32_to_cpu(hdr->jt_offset); 1211 table_size = le32_to_cpu(hdr->jt_size); 1212 } else if (me == 3) { 1213 const struct gfx_firmware_header_v1_0 *hdr = 1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1215 fw_data = (const __le32 *) 1216 (adev->gfx.mec_fw->data + 1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1218 table_offset = le32_to_cpu(hdr->jt_offset); 1219 table_size = le32_to_cpu(hdr->jt_size); 1220 } else if (me == 4) { 1221 const struct gfx_firmware_header_v1_0 *hdr = 1222 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1223 fw_data = (const __le32 *) 1224 (adev->gfx.mec2_fw->data + 1225 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1226 table_offset = le32_to_cpu(hdr->jt_offset); 1227 table_size = le32_to_cpu(hdr->jt_size); 1228 } 1229 1230 for (i = 0; i < table_size; i ++) { 1231 dst_ptr[bo_offset + i] = 1232 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1233 } 1234 1235 bo_offset += table_size; 1236 } 1237 } 1238 1239 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1240 { 1241 int r; 1242 1243 /* clear state block */ 1244 if (adev->gfx.rlc.clear_state_obj) { 1245 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1246 if (unlikely(r != 0)) 1247 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1248 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1249 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1250 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1251 adev->gfx.rlc.clear_state_obj = NULL; 1252 } 1253 1254 /* jump table block */ 1255 if (adev->gfx.rlc.cp_table_obj) { 1256 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); 1257 if (unlikely(r != 0)) 1258 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1259 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1260 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1261 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1262 adev->gfx.rlc.cp_table_obj = NULL; 1263 } 1264 } 1265 1266 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1267 { 1268 volatile u32 *dst_ptr; 1269 u32 dws; 1270 const struct cs_section_def *cs_data; 1271 int r; 1272 1273 adev->gfx.rlc.cs_data = vi_cs_data; 1274 1275 cs_data = adev->gfx.rlc.cs_data; 1276 1277 if (cs_data) { 1278 /* clear state block */ 1279 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1280 1281 if (adev->gfx.rlc.clear_state_obj == NULL) { 1282 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1283 AMDGPU_GEM_DOMAIN_VRAM, 1284 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1285 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1286 NULL, NULL, 1287 &adev->gfx.rlc.clear_state_obj); 1288 if (r) { 1289 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1290 gfx_v8_0_rlc_fini(adev); 1291 return r; 1292 } 1293 } 1294 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1295 if (unlikely(r != 0)) { 1296 gfx_v8_0_rlc_fini(adev); 1297 return r; 1298 } 1299 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1300 &adev->gfx.rlc.clear_state_gpu_addr); 1301 if (r) { 1302 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1303 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1304 gfx_v8_0_rlc_fini(adev); 1305 return r; 1306 } 1307 1308 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1309 if (r) { 1310 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1311 gfx_v8_0_rlc_fini(adev); 1312 return r; 1313 } 1314 /* set up the cs buffer */ 1315 dst_ptr = adev->gfx.rlc.cs_ptr; 1316 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1317 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1318 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1319 } 1320 1321 if ((adev->asic_type == CHIP_CARRIZO) || 1322 (adev->asic_type == CHIP_STONEY)) { 1323 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1324 if (adev->gfx.rlc.cp_table_obj == NULL) { 1325 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1326 AMDGPU_GEM_DOMAIN_VRAM, 1327 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1328 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1329 NULL, NULL, 1330 &adev->gfx.rlc.cp_table_obj); 1331 if (r) { 1332 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1333 return r; 1334 } 1335 } 1336 1337 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1338 if (unlikely(r != 0)) { 1339 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1340 return r; 1341 } 1342 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1343 &adev->gfx.rlc.cp_table_gpu_addr); 1344 if (r) { 1345 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1346 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1347 return r; 1348 } 1349 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1350 if (r) { 1351 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1352 return r; 1353 } 1354 1355 cz_init_cp_jump_table(adev); 1356 1357 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1358 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1359 } 1360 1361 return 0; 1362 } 1363 1364 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1365 { 1366 int r; 1367 1368 if (adev->gfx.mec.hpd_eop_obj) { 1369 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); 1370 if (unlikely(r != 0)) 1371 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1372 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1373 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1374 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1375 adev->gfx.mec.hpd_eop_obj = NULL; 1376 } 1377 } 1378 1379 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1380 { 1381 int r; 1382 u32 *hpd; 1383 size_t mec_hpd_size; 1384 1385 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1386 1387 /* take ownership of the relevant compute queues */ 1388 amdgpu_gfx_compute_queue_acquire(adev); 1389 1390 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1391 1392 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1393 r = amdgpu_bo_create(adev, 1394 mec_hpd_size, 1395 PAGE_SIZE, true, 1396 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1397 &adev->gfx.mec.hpd_eop_obj); 1398 if (r) { 1399 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1400 return r; 1401 } 1402 } 1403 1404 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1405 if (unlikely(r != 0)) { 1406 gfx_v8_0_mec_fini(adev); 1407 return r; 1408 } 1409 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1410 &adev->gfx.mec.hpd_eop_gpu_addr); 1411 if (r) { 1412 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1413 gfx_v8_0_mec_fini(adev); 1414 return r; 1415 } 1416 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1417 if (r) { 1418 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1419 gfx_v8_0_mec_fini(adev); 1420 return r; 1421 } 1422 1423 memset(hpd, 0, mec_hpd_size); 1424 1425 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1426 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1427 1428 return 0; 1429 } 1430 1431 static const u32 vgpr_init_compute_shader[] = 1432 { 1433 0x7e000209, 0x7e020208, 1434 0x7e040207, 0x7e060206, 1435 0x7e080205, 0x7e0a0204, 1436 0x7e0c0203, 0x7e0e0202, 1437 0x7e100201, 0x7e120200, 1438 0x7e140209, 0x7e160208, 1439 0x7e180207, 0x7e1a0206, 1440 0x7e1c0205, 0x7e1e0204, 1441 0x7e200203, 0x7e220202, 1442 0x7e240201, 0x7e260200, 1443 0x7e280209, 0x7e2a0208, 1444 0x7e2c0207, 0x7e2e0206, 1445 0x7e300205, 0x7e320204, 1446 0x7e340203, 0x7e360202, 1447 0x7e380201, 0x7e3a0200, 1448 0x7e3c0209, 0x7e3e0208, 1449 0x7e400207, 0x7e420206, 1450 0x7e440205, 0x7e460204, 1451 0x7e480203, 0x7e4a0202, 1452 0x7e4c0201, 0x7e4e0200, 1453 0x7e500209, 0x7e520208, 1454 0x7e540207, 0x7e560206, 1455 0x7e580205, 0x7e5a0204, 1456 0x7e5c0203, 0x7e5e0202, 1457 0x7e600201, 0x7e620200, 1458 0x7e640209, 0x7e660208, 1459 0x7e680207, 0x7e6a0206, 1460 0x7e6c0205, 0x7e6e0204, 1461 0x7e700203, 0x7e720202, 1462 0x7e740201, 0x7e760200, 1463 0x7e780209, 0x7e7a0208, 1464 0x7e7c0207, 0x7e7e0206, 1465 0xbf8a0000, 0xbf810000, 1466 }; 1467 1468 static const u32 sgpr_init_compute_shader[] = 1469 { 1470 0xbe8a0100, 0xbe8c0102, 1471 0xbe8e0104, 0xbe900106, 1472 0xbe920108, 0xbe940100, 1473 0xbe960102, 0xbe980104, 1474 0xbe9a0106, 0xbe9c0108, 1475 0xbe9e0100, 0xbea00102, 1476 0xbea20104, 0xbea40106, 1477 0xbea60108, 0xbea80100, 1478 0xbeaa0102, 0xbeac0104, 1479 0xbeae0106, 0xbeb00108, 1480 0xbeb20100, 0xbeb40102, 1481 0xbeb60104, 0xbeb80106, 1482 0xbeba0108, 0xbebc0100, 1483 0xbebe0102, 0xbec00104, 1484 0xbec20106, 0xbec40108, 1485 0xbec60100, 0xbec80102, 1486 0xbee60004, 0xbee70005, 1487 0xbeea0006, 0xbeeb0007, 1488 0xbee80008, 0xbee90009, 1489 0xbefc0000, 0xbf8a0000, 1490 0xbf810000, 0x00000000, 1491 }; 1492 1493 static const u32 vgpr_init_regs[] = 1494 { 1495 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1496 mmCOMPUTE_RESOURCE_LIMITS, 0, 1497 mmCOMPUTE_NUM_THREAD_X, 256*4, 1498 mmCOMPUTE_NUM_THREAD_Y, 1, 1499 mmCOMPUTE_NUM_THREAD_Z, 1, 1500 mmCOMPUTE_PGM_RSRC2, 20, 1501 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1502 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1503 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1504 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1505 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1506 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1507 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1508 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1509 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1510 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1511 }; 1512 1513 static const u32 sgpr1_init_regs[] = 1514 { 1515 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1516 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1517 mmCOMPUTE_NUM_THREAD_X, 256*5, 1518 mmCOMPUTE_NUM_THREAD_Y, 1, 1519 mmCOMPUTE_NUM_THREAD_Z, 1, 1520 mmCOMPUTE_PGM_RSRC2, 20, 1521 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1522 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1523 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1524 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1525 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1526 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1527 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1528 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1529 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1530 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1531 }; 1532 1533 static const u32 sgpr2_init_regs[] = 1534 { 1535 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1536 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1537 mmCOMPUTE_NUM_THREAD_X, 256*5, 1538 mmCOMPUTE_NUM_THREAD_Y, 1, 1539 mmCOMPUTE_NUM_THREAD_Z, 1, 1540 mmCOMPUTE_PGM_RSRC2, 20, 1541 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1542 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1543 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1544 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1545 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1546 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1547 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1548 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1549 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1550 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1551 }; 1552 1553 static const u32 sec_ded_counter_registers[] = 1554 { 1555 mmCPC_EDC_ATC_CNT, 1556 mmCPC_EDC_SCRATCH_CNT, 1557 mmCPC_EDC_UCODE_CNT, 1558 mmCPF_EDC_ATC_CNT, 1559 mmCPF_EDC_ROQ_CNT, 1560 mmCPF_EDC_TAG_CNT, 1561 mmCPG_EDC_ATC_CNT, 1562 mmCPG_EDC_DMA_CNT, 1563 mmCPG_EDC_TAG_CNT, 1564 mmDC_EDC_CSINVOC_CNT, 1565 mmDC_EDC_RESTORE_CNT, 1566 mmDC_EDC_STATE_CNT, 1567 mmGDS_EDC_CNT, 1568 mmGDS_EDC_GRBM_CNT, 1569 mmGDS_EDC_OA_DED, 1570 mmSPI_EDC_CNT, 1571 mmSQC_ATC_EDC_GATCL1_CNT, 1572 mmSQC_EDC_CNT, 1573 mmSQ_EDC_DED_CNT, 1574 mmSQ_EDC_INFO, 1575 mmSQ_EDC_SEC_CNT, 1576 mmTCC_EDC_CNT, 1577 mmTCP_ATC_EDC_GATCL1_CNT, 1578 mmTCP_EDC_CNT, 1579 mmTD_EDC_CNT 1580 }; 1581 1582 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1583 { 1584 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1585 struct amdgpu_ib ib; 1586 struct dma_fence *f = NULL; 1587 int r, i; 1588 u32 tmp; 1589 unsigned total_size, vgpr_offset, sgpr_offset; 1590 u64 gpu_addr; 1591 1592 /* only supported on CZ */ 1593 if (adev->asic_type != CHIP_CARRIZO) 1594 return 0; 1595 1596 /* bail if the compute ring is not ready */ 1597 if (!ring->ready) 1598 return 0; 1599 1600 tmp = RREG32(mmGB_EDC_MODE); 1601 WREG32(mmGB_EDC_MODE, 0); 1602 1603 total_size = 1604 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1605 total_size += 1606 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1607 total_size += 1608 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1609 total_size = ALIGN(total_size, 256); 1610 vgpr_offset = total_size; 1611 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1612 sgpr_offset = total_size; 1613 total_size += sizeof(sgpr_init_compute_shader); 1614 1615 /* allocate an indirect buffer to put the commands in */ 1616 memset(&ib, 0, sizeof(ib)); 1617 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1618 if (r) { 1619 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1620 return r; 1621 } 1622 1623 /* load the compute shaders */ 1624 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1625 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1626 1627 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1628 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1629 1630 /* init the ib length to 0 */ 1631 ib.length_dw = 0; 1632 1633 /* VGPR */ 1634 /* write the register state for the compute dispatch */ 1635 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1636 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1637 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1638 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1639 } 1640 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1641 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1642 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1643 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1644 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1645 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1646 1647 /* write dispatch packet */ 1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1649 ib.ptr[ib.length_dw++] = 8; /* x */ 1650 ib.ptr[ib.length_dw++] = 1; /* y */ 1651 ib.ptr[ib.length_dw++] = 1; /* z */ 1652 ib.ptr[ib.length_dw++] = 1653 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1654 1655 /* write CS partial flush packet */ 1656 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1657 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1658 1659 /* SGPR1 */ 1660 /* write the register state for the compute dispatch */ 1661 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1662 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1663 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1664 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1665 } 1666 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1667 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1668 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1669 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1670 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1671 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1672 1673 /* write dispatch packet */ 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1675 ib.ptr[ib.length_dw++] = 8; /* x */ 1676 ib.ptr[ib.length_dw++] = 1; /* y */ 1677 ib.ptr[ib.length_dw++] = 1; /* z */ 1678 ib.ptr[ib.length_dw++] = 1679 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1680 1681 /* write CS partial flush packet */ 1682 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1683 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1684 1685 /* SGPR2 */ 1686 /* write the register state for the compute dispatch */ 1687 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1689 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1690 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1691 } 1692 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1693 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1695 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1696 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1697 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1698 1699 /* write dispatch packet */ 1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1701 ib.ptr[ib.length_dw++] = 8; /* x */ 1702 ib.ptr[ib.length_dw++] = 1; /* y */ 1703 ib.ptr[ib.length_dw++] = 1; /* z */ 1704 ib.ptr[ib.length_dw++] = 1705 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1706 1707 /* write CS partial flush packet */ 1708 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1709 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1710 1711 /* shedule the ib on the ring */ 1712 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1713 if (r) { 1714 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1715 goto fail; 1716 } 1717 1718 /* wait for the GPU to finish processing the IB */ 1719 r = dma_fence_wait(f, false); 1720 if (r) { 1721 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1722 goto fail; 1723 } 1724 1725 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1726 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1727 WREG32(mmGB_EDC_MODE, tmp); 1728 1729 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1730 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1731 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1732 1733 1734 /* read back registers to clear the counters */ 1735 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1736 RREG32(sec_ded_counter_registers[i]); 1737 1738 fail: 1739 amdgpu_ib_free(adev, &ib, NULL); 1740 dma_fence_put(f); 1741 1742 return r; 1743 } 1744 1745 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1746 { 1747 u32 gb_addr_config; 1748 u32 mc_shared_chmap, mc_arb_ramcfg; 1749 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1750 u32 tmp; 1751 int ret; 1752 1753 switch (adev->asic_type) { 1754 case CHIP_TOPAZ: 1755 adev->gfx.config.max_shader_engines = 1; 1756 adev->gfx.config.max_tile_pipes = 2; 1757 adev->gfx.config.max_cu_per_sh = 6; 1758 adev->gfx.config.max_sh_per_se = 1; 1759 adev->gfx.config.max_backends_per_se = 2; 1760 adev->gfx.config.max_texture_channel_caches = 2; 1761 adev->gfx.config.max_gprs = 256; 1762 adev->gfx.config.max_gs_threads = 32; 1763 adev->gfx.config.max_hw_contexts = 8; 1764 1765 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1766 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1767 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1768 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1769 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1770 break; 1771 case CHIP_FIJI: 1772 adev->gfx.config.max_shader_engines = 4; 1773 adev->gfx.config.max_tile_pipes = 16; 1774 adev->gfx.config.max_cu_per_sh = 16; 1775 adev->gfx.config.max_sh_per_se = 1; 1776 adev->gfx.config.max_backends_per_se = 4; 1777 adev->gfx.config.max_texture_channel_caches = 16; 1778 adev->gfx.config.max_gprs = 256; 1779 adev->gfx.config.max_gs_threads = 32; 1780 adev->gfx.config.max_hw_contexts = 8; 1781 1782 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1783 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1784 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1785 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1786 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1787 break; 1788 case CHIP_POLARIS11: 1789 case CHIP_POLARIS12: 1790 ret = amdgpu_atombios_get_gfx_info(adev); 1791 if (ret) 1792 return ret; 1793 adev->gfx.config.max_gprs = 256; 1794 adev->gfx.config.max_gs_threads = 32; 1795 adev->gfx.config.max_hw_contexts = 8; 1796 1797 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1798 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1799 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1800 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1801 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1802 break; 1803 case CHIP_POLARIS10: 1804 ret = amdgpu_atombios_get_gfx_info(adev); 1805 if (ret) 1806 return ret; 1807 adev->gfx.config.max_gprs = 256; 1808 adev->gfx.config.max_gs_threads = 32; 1809 adev->gfx.config.max_hw_contexts = 8; 1810 1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1815 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1816 break; 1817 case CHIP_TONGA: 1818 adev->gfx.config.max_shader_engines = 4; 1819 adev->gfx.config.max_tile_pipes = 8; 1820 adev->gfx.config.max_cu_per_sh = 8; 1821 adev->gfx.config.max_sh_per_se = 1; 1822 adev->gfx.config.max_backends_per_se = 2; 1823 adev->gfx.config.max_texture_channel_caches = 8; 1824 adev->gfx.config.max_gprs = 256; 1825 adev->gfx.config.max_gs_threads = 32; 1826 adev->gfx.config.max_hw_contexts = 8; 1827 1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1832 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1833 break; 1834 case CHIP_CARRIZO: 1835 adev->gfx.config.max_shader_engines = 1; 1836 adev->gfx.config.max_tile_pipes = 2; 1837 adev->gfx.config.max_sh_per_se = 1; 1838 adev->gfx.config.max_backends_per_se = 2; 1839 adev->gfx.config.max_cu_per_sh = 8; 1840 adev->gfx.config.max_texture_channel_caches = 2; 1841 adev->gfx.config.max_gprs = 256; 1842 adev->gfx.config.max_gs_threads = 32; 1843 adev->gfx.config.max_hw_contexts = 8; 1844 1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1849 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1850 break; 1851 case CHIP_STONEY: 1852 adev->gfx.config.max_shader_engines = 1; 1853 adev->gfx.config.max_tile_pipes = 2; 1854 adev->gfx.config.max_sh_per_se = 1; 1855 adev->gfx.config.max_backends_per_se = 1; 1856 adev->gfx.config.max_cu_per_sh = 3; 1857 adev->gfx.config.max_texture_channel_caches = 2; 1858 adev->gfx.config.max_gprs = 256; 1859 adev->gfx.config.max_gs_threads = 16; 1860 adev->gfx.config.max_hw_contexts = 8; 1861 1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1866 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1867 break; 1868 default: 1869 adev->gfx.config.max_shader_engines = 2; 1870 adev->gfx.config.max_tile_pipes = 4; 1871 adev->gfx.config.max_cu_per_sh = 2; 1872 adev->gfx.config.max_sh_per_se = 1; 1873 adev->gfx.config.max_backends_per_se = 2; 1874 adev->gfx.config.max_texture_channel_caches = 4; 1875 adev->gfx.config.max_gprs = 256; 1876 adev->gfx.config.max_gs_threads = 32; 1877 adev->gfx.config.max_hw_contexts = 8; 1878 1879 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1880 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1881 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1882 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1883 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1884 break; 1885 } 1886 1887 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1888 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1889 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1890 1891 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1892 adev->gfx.config.mem_max_burst_length_bytes = 256; 1893 if (adev->flags & AMD_IS_APU) { 1894 /* Get memory bank mapping mode. */ 1895 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1896 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1897 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1898 1899 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1900 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1901 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1902 1903 /* Validate settings in case only one DIMM installed. */ 1904 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1905 dimm00_addr_map = 0; 1906 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1907 dimm01_addr_map = 0; 1908 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1909 dimm10_addr_map = 0; 1910 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1911 dimm11_addr_map = 0; 1912 1913 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1914 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1915 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1916 adev->gfx.config.mem_row_size_in_kb = 2; 1917 else 1918 adev->gfx.config.mem_row_size_in_kb = 1; 1919 } else { 1920 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1921 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1922 if (adev->gfx.config.mem_row_size_in_kb > 4) 1923 adev->gfx.config.mem_row_size_in_kb = 4; 1924 } 1925 1926 adev->gfx.config.shader_engine_tile_size = 32; 1927 adev->gfx.config.num_gpus = 1; 1928 adev->gfx.config.multi_gpu_tile_size = 64; 1929 1930 /* fix up row size */ 1931 switch (adev->gfx.config.mem_row_size_in_kb) { 1932 case 1: 1933 default: 1934 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1935 break; 1936 case 2: 1937 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1938 break; 1939 case 4: 1940 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1941 break; 1942 } 1943 adev->gfx.config.gb_addr_config = gb_addr_config; 1944 1945 return 0; 1946 } 1947 1948 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1949 int mec, int pipe, int queue) 1950 { 1951 int r; 1952 unsigned irq_type; 1953 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1954 1955 ring = &adev->gfx.compute_ring[ring_id]; 1956 1957 /* mec0 is me1 */ 1958 ring->me = mec + 1; 1959 ring->pipe = pipe; 1960 ring->queue = queue; 1961 1962 ring->ring_obj = NULL; 1963 ring->use_doorbell = true; 1964 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 1965 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1966 + (ring_id * GFX8_MEC_HPD_SIZE); 1967 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1968 1969 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1970 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1971 + ring->pipe; 1972 1973 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1974 r = amdgpu_ring_init(adev, ring, 1024, 1975 &adev->gfx.eop_irq, irq_type); 1976 if (r) 1977 return r; 1978 1979 1980 return 0; 1981 } 1982 1983 static int gfx_v8_0_sw_init(void *handle) 1984 { 1985 int i, j, k, r, ring_id; 1986 struct amdgpu_ring *ring; 1987 struct amdgpu_kiq *kiq; 1988 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1989 1990 switch (adev->asic_type) { 1991 case CHIP_FIJI: 1992 case CHIP_TONGA: 1993 case CHIP_POLARIS11: 1994 case CHIP_POLARIS12: 1995 case CHIP_POLARIS10: 1996 case CHIP_CARRIZO: 1997 adev->gfx.mec.num_mec = 2; 1998 break; 1999 case CHIP_TOPAZ: 2000 case CHIP_STONEY: 2001 default: 2002 adev->gfx.mec.num_mec = 1; 2003 break; 2004 } 2005 2006 adev->gfx.mec.num_pipe_per_mec = 4; 2007 adev->gfx.mec.num_queue_per_pipe = 8; 2008 2009 /* KIQ event */ 2010 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2011 if (r) 2012 return r; 2013 2014 /* EOP Event */ 2015 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 2016 if (r) 2017 return r; 2018 2019 /* Privileged reg */ 2020 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 2021 &adev->gfx.priv_reg_irq); 2022 if (r) 2023 return r; 2024 2025 /* Privileged inst */ 2026 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 2027 &adev->gfx.priv_inst_irq); 2028 if (r) 2029 return r; 2030 2031 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2032 2033 gfx_v8_0_scratch_init(adev); 2034 2035 r = gfx_v8_0_init_microcode(adev); 2036 if (r) { 2037 DRM_ERROR("Failed to load gfx firmware!\n"); 2038 return r; 2039 } 2040 2041 r = gfx_v8_0_rlc_init(adev); 2042 if (r) { 2043 DRM_ERROR("Failed to init rlc BOs!\n"); 2044 return r; 2045 } 2046 2047 r = gfx_v8_0_mec_init(adev); 2048 if (r) { 2049 DRM_ERROR("Failed to init MEC BOs!\n"); 2050 return r; 2051 } 2052 2053 /* set up the gfx ring */ 2054 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2055 ring = &adev->gfx.gfx_ring[i]; 2056 ring->ring_obj = NULL; 2057 sprintf(ring->name, "gfx"); 2058 /* no gfx doorbells on iceland */ 2059 if (adev->asic_type != CHIP_TOPAZ) { 2060 ring->use_doorbell = true; 2061 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2062 } 2063 2064 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2065 AMDGPU_CP_IRQ_GFX_EOP); 2066 if (r) 2067 return r; 2068 } 2069 2070 2071 /* set up the compute queues - allocate horizontally across pipes */ 2072 ring_id = 0; 2073 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2074 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2075 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2076 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2077 continue; 2078 2079 r = gfx_v8_0_compute_ring_init(adev, 2080 ring_id, 2081 i, k, j); 2082 if (r) 2083 return r; 2084 2085 ring_id++; 2086 } 2087 } 2088 } 2089 2090 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2091 if (r) { 2092 DRM_ERROR("Failed to init KIQ BOs!\n"); 2093 return r; 2094 } 2095 2096 kiq = &adev->gfx.kiq; 2097 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2098 if (r) 2099 return r; 2100 2101 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2102 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2103 if (r) 2104 return r; 2105 2106 /* reserve GDS, GWS and OA resource for gfx */ 2107 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2108 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2109 &adev->gds.gds_gfx_bo, NULL, NULL); 2110 if (r) 2111 return r; 2112 2113 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2114 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2115 &adev->gds.gws_gfx_bo, NULL, NULL); 2116 if (r) 2117 return r; 2118 2119 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2120 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2121 &adev->gds.oa_gfx_bo, NULL, NULL); 2122 if (r) 2123 return r; 2124 2125 adev->gfx.ce_ram_size = 0x8000; 2126 2127 r = gfx_v8_0_gpu_early_init(adev); 2128 if (r) 2129 return r; 2130 2131 return 0; 2132 } 2133 2134 static int gfx_v8_0_sw_fini(void *handle) 2135 { 2136 int i; 2137 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2138 2139 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2140 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2141 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2142 2143 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2144 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2145 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2146 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2147 2148 amdgpu_gfx_compute_mqd_sw_fini(adev); 2149 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2150 amdgpu_gfx_kiq_fini(adev); 2151 2152 gfx_v8_0_mec_fini(adev); 2153 gfx_v8_0_rlc_fini(adev); 2154 gfx_v8_0_free_microcode(adev); 2155 2156 return 0; 2157 } 2158 2159 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2160 { 2161 uint32_t *modearray, *mod2array; 2162 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2163 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2164 u32 reg_offset; 2165 2166 modearray = adev->gfx.config.tile_mode_array; 2167 mod2array = adev->gfx.config.macrotile_mode_array; 2168 2169 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2170 modearray[reg_offset] = 0; 2171 2172 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2173 mod2array[reg_offset] = 0; 2174 2175 switch (adev->asic_type) { 2176 case CHIP_TOPAZ: 2177 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2178 PIPE_CONFIG(ADDR_SURF_P2) | 2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2181 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2182 PIPE_CONFIG(ADDR_SURF_P2) | 2183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2185 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2186 PIPE_CONFIG(ADDR_SURF_P2) | 2187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2189 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2190 PIPE_CONFIG(ADDR_SURF_P2) | 2191 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2193 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2194 PIPE_CONFIG(ADDR_SURF_P2) | 2195 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2196 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2197 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2198 PIPE_CONFIG(ADDR_SURF_P2) | 2199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2200 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2201 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2202 PIPE_CONFIG(ADDR_SURF_P2) | 2203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2205 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2206 PIPE_CONFIG(ADDR_SURF_P2)); 2207 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2208 PIPE_CONFIG(ADDR_SURF_P2) | 2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2211 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2212 PIPE_CONFIG(ADDR_SURF_P2) | 2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2215 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2216 PIPE_CONFIG(ADDR_SURF_P2) | 2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2219 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2220 PIPE_CONFIG(ADDR_SURF_P2) | 2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2223 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2224 PIPE_CONFIG(ADDR_SURF_P2) | 2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2227 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2228 PIPE_CONFIG(ADDR_SURF_P2) | 2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2231 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2232 PIPE_CONFIG(ADDR_SURF_P2) | 2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2235 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2236 PIPE_CONFIG(ADDR_SURF_P2) | 2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2239 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2240 PIPE_CONFIG(ADDR_SURF_P2) | 2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2243 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2244 PIPE_CONFIG(ADDR_SURF_P2) | 2245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2247 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2248 PIPE_CONFIG(ADDR_SURF_P2) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2251 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2252 PIPE_CONFIG(ADDR_SURF_P2) | 2253 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2255 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2256 PIPE_CONFIG(ADDR_SURF_P2) | 2257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2259 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2260 PIPE_CONFIG(ADDR_SURF_P2) | 2261 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2263 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2264 PIPE_CONFIG(ADDR_SURF_P2) | 2265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2267 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2268 PIPE_CONFIG(ADDR_SURF_P2) | 2269 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2271 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2275 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2279 2280 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2283 NUM_BANKS(ADDR_SURF_8_BANK)); 2284 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2287 NUM_BANKS(ADDR_SURF_8_BANK)); 2288 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2291 NUM_BANKS(ADDR_SURF_8_BANK)); 2292 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2295 NUM_BANKS(ADDR_SURF_8_BANK)); 2296 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2299 NUM_BANKS(ADDR_SURF_8_BANK)); 2300 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2303 NUM_BANKS(ADDR_SURF_8_BANK)); 2304 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2307 NUM_BANKS(ADDR_SURF_8_BANK)); 2308 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2311 NUM_BANKS(ADDR_SURF_16_BANK)); 2312 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2315 NUM_BANKS(ADDR_SURF_16_BANK)); 2316 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2319 NUM_BANKS(ADDR_SURF_16_BANK)); 2320 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2323 NUM_BANKS(ADDR_SURF_16_BANK)); 2324 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2327 NUM_BANKS(ADDR_SURF_16_BANK)); 2328 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2331 NUM_BANKS(ADDR_SURF_16_BANK)); 2332 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2335 NUM_BANKS(ADDR_SURF_8_BANK)); 2336 2337 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2338 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2339 reg_offset != 23) 2340 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2341 2342 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2343 if (reg_offset != 7) 2344 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2345 2346 break; 2347 case CHIP_FIJI: 2348 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2352 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2356 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2360 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2364 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2368 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2372 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2373 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2376 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2377 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2380 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2382 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2386 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2390 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2394 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2395 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2398 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2402 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2406 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2410 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2414 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2415 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2418 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2422 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2426 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2430 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2434 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2438 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2439 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2442 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2444 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2446 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2448 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2450 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2454 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2458 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2460 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2462 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2464 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2466 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2467 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2468 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2470 2471 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2474 NUM_BANKS(ADDR_SURF_8_BANK)); 2475 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2478 NUM_BANKS(ADDR_SURF_8_BANK)); 2479 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2482 NUM_BANKS(ADDR_SURF_8_BANK)); 2483 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2486 NUM_BANKS(ADDR_SURF_8_BANK)); 2487 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2490 NUM_BANKS(ADDR_SURF_8_BANK)); 2491 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2494 NUM_BANKS(ADDR_SURF_8_BANK)); 2495 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2498 NUM_BANKS(ADDR_SURF_8_BANK)); 2499 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2502 NUM_BANKS(ADDR_SURF_8_BANK)); 2503 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2506 NUM_BANKS(ADDR_SURF_8_BANK)); 2507 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2510 NUM_BANKS(ADDR_SURF_8_BANK)); 2511 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2514 NUM_BANKS(ADDR_SURF_8_BANK)); 2515 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2518 NUM_BANKS(ADDR_SURF_8_BANK)); 2519 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2522 NUM_BANKS(ADDR_SURF_8_BANK)); 2523 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2526 NUM_BANKS(ADDR_SURF_4_BANK)); 2527 2528 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2529 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2530 2531 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2532 if (reg_offset != 7) 2533 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2534 2535 break; 2536 case CHIP_TONGA: 2537 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2541 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2545 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2549 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2551 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2553 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2555 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2557 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2559 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2561 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2563 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2564 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2565 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2566 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2567 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2568 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2569 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2571 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2575 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2579 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2583 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2584 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2587 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2591 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2595 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2599 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2603 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2607 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2611 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2615 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2619 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2621 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2623 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2625 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2627 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2628 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2629 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2631 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2633 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2635 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2637 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2638 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2639 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2641 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2643 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2645 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2647 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2649 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2651 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2653 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2655 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2656 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2657 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2659 2660 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2663 NUM_BANKS(ADDR_SURF_16_BANK)); 2664 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2667 NUM_BANKS(ADDR_SURF_16_BANK)); 2668 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2671 NUM_BANKS(ADDR_SURF_16_BANK)); 2672 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2675 NUM_BANKS(ADDR_SURF_16_BANK)); 2676 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2679 NUM_BANKS(ADDR_SURF_16_BANK)); 2680 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2683 NUM_BANKS(ADDR_SURF_16_BANK)); 2684 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2687 NUM_BANKS(ADDR_SURF_16_BANK)); 2688 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2691 NUM_BANKS(ADDR_SURF_16_BANK)); 2692 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2695 NUM_BANKS(ADDR_SURF_16_BANK)); 2696 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2697 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2698 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2699 NUM_BANKS(ADDR_SURF_16_BANK)); 2700 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2703 NUM_BANKS(ADDR_SURF_16_BANK)); 2704 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2705 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2706 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2707 NUM_BANKS(ADDR_SURF_8_BANK)); 2708 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2709 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2710 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2711 NUM_BANKS(ADDR_SURF_4_BANK)); 2712 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2715 NUM_BANKS(ADDR_SURF_4_BANK)); 2716 2717 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2718 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2719 2720 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2721 if (reg_offset != 7) 2722 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2723 2724 break; 2725 case CHIP_POLARIS11: 2726 case CHIP_POLARIS12: 2727 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2731 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2735 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2739 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2741 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2743 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2745 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2747 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2750 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2751 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2753 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2755 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2757 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2758 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2759 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2760 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2761 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2763 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2765 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2767 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2769 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2773 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2777 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2781 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2785 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2789 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2793 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2797 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2801 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2805 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2809 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2811 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2813 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2815 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2817 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2819 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2821 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2823 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2825 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2827 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2829 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2831 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2833 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2835 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2837 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2841 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2843 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2845 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2849 2850 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2851 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2852 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2853 NUM_BANKS(ADDR_SURF_16_BANK)); 2854 2855 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2858 NUM_BANKS(ADDR_SURF_16_BANK)); 2859 2860 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2861 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2862 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2863 NUM_BANKS(ADDR_SURF_16_BANK)); 2864 2865 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2868 NUM_BANKS(ADDR_SURF_16_BANK)); 2869 2870 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2873 NUM_BANKS(ADDR_SURF_16_BANK)); 2874 2875 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2878 NUM_BANKS(ADDR_SURF_16_BANK)); 2879 2880 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2883 NUM_BANKS(ADDR_SURF_16_BANK)); 2884 2885 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2888 NUM_BANKS(ADDR_SURF_16_BANK)); 2889 2890 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2893 NUM_BANKS(ADDR_SURF_16_BANK)); 2894 2895 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2898 NUM_BANKS(ADDR_SURF_16_BANK)); 2899 2900 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2903 NUM_BANKS(ADDR_SURF_16_BANK)); 2904 2905 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2908 NUM_BANKS(ADDR_SURF_16_BANK)); 2909 2910 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2913 NUM_BANKS(ADDR_SURF_8_BANK)); 2914 2915 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2918 NUM_BANKS(ADDR_SURF_4_BANK)); 2919 2920 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2921 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2922 2923 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2924 if (reg_offset != 7) 2925 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2926 2927 break; 2928 case CHIP_POLARIS10: 2929 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2933 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2937 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2939 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2941 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2943 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2945 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2948 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2949 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2950 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2951 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2953 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2954 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2955 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2957 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2958 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2959 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2960 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2961 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2963 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2965 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2967 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2969 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2971 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2975 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2976 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2979 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2983 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2987 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2991 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2995 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2996 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2999 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3003 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3007 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3011 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3013 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3015 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3017 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3019 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3020 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3021 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3023 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3025 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3027 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3029 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3030 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3031 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3033 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3035 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3039 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3041 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3043 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3045 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3047 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3048 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3049 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3051 3052 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3055 NUM_BANKS(ADDR_SURF_16_BANK)); 3056 3057 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3060 NUM_BANKS(ADDR_SURF_16_BANK)); 3061 3062 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3065 NUM_BANKS(ADDR_SURF_16_BANK)); 3066 3067 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3070 NUM_BANKS(ADDR_SURF_16_BANK)); 3071 3072 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3075 NUM_BANKS(ADDR_SURF_16_BANK)); 3076 3077 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3080 NUM_BANKS(ADDR_SURF_16_BANK)); 3081 3082 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3085 NUM_BANKS(ADDR_SURF_16_BANK)); 3086 3087 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3090 NUM_BANKS(ADDR_SURF_16_BANK)); 3091 3092 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3095 NUM_BANKS(ADDR_SURF_16_BANK)); 3096 3097 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3100 NUM_BANKS(ADDR_SURF_16_BANK)); 3101 3102 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3105 NUM_BANKS(ADDR_SURF_16_BANK)); 3106 3107 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3110 NUM_BANKS(ADDR_SURF_8_BANK)); 3111 3112 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3115 NUM_BANKS(ADDR_SURF_4_BANK)); 3116 3117 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3120 NUM_BANKS(ADDR_SURF_4_BANK)); 3121 3122 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3123 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3124 3125 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3126 if (reg_offset != 7) 3127 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3128 3129 break; 3130 case CHIP_STONEY: 3131 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3132 PIPE_CONFIG(ADDR_SURF_P2) | 3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3135 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3136 PIPE_CONFIG(ADDR_SURF_P2) | 3137 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3139 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3140 PIPE_CONFIG(ADDR_SURF_P2) | 3141 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3143 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3144 PIPE_CONFIG(ADDR_SURF_P2) | 3145 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3147 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3148 PIPE_CONFIG(ADDR_SURF_P2) | 3149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3151 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3152 PIPE_CONFIG(ADDR_SURF_P2) | 3153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3154 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3155 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3156 PIPE_CONFIG(ADDR_SURF_P2) | 3157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3159 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3160 PIPE_CONFIG(ADDR_SURF_P2)); 3161 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3162 PIPE_CONFIG(ADDR_SURF_P2) | 3163 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3165 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3166 PIPE_CONFIG(ADDR_SURF_P2) | 3167 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3169 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3170 PIPE_CONFIG(ADDR_SURF_P2) | 3171 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3173 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3174 PIPE_CONFIG(ADDR_SURF_P2) | 3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3177 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3178 PIPE_CONFIG(ADDR_SURF_P2) | 3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3181 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3182 PIPE_CONFIG(ADDR_SURF_P2) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3185 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3186 PIPE_CONFIG(ADDR_SURF_P2) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3189 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3190 PIPE_CONFIG(ADDR_SURF_P2) | 3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3193 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3194 PIPE_CONFIG(ADDR_SURF_P2) | 3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3197 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3198 PIPE_CONFIG(ADDR_SURF_P2) | 3199 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3201 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3202 PIPE_CONFIG(ADDR_SURF_P2) | 3203 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3205 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3206 PIPE_CONFIG(ADDR_SURF_P2) | 3207 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3209 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3210 PIPE_CONFIG(ADDR_SURF_P2) | 3211 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3213 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3214 PIPE_CONFIG(ADDR_SURF_P2) | 3215 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3217 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3218 PIPE_CONFIG(ADDR_SURF_P2) | 3219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3221 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3222 PIPE_CONFIG(ADDR_SURF_P2) | 3223 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3225 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3226 PIPE_CONFIG(ADDR_SURF_P2) | 3227 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3229 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3233 3234 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3237 NUM_BANKS(ADDR_SURF_8_BANK)); 3238 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3241 NUM_BANKS(ADDR_SURF_8_BANK)); 3242 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3245 NUM_BANKS(ADDR_SURF_8_BANK)); 3246 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3249 NUM_BANKS(ADDR_SURF_8_BANK)); 3250 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3253 NUM_BANKS(ADDR_SURF_8_BANK)); 3254 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3257 NUM_BANKS(ADDR_SURF_8_BANK)); 3258 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3261 NUM_BANKS(ADDR_SURF_8_BANK)); 3262 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3265 NUM_BANKS(ADDR_SURF_16_BANK)); 3266 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3269 NUM_BANKS(ADDR_SURF_16_BANK)); 3270 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3273 NUM_BANKS(ADDR_SURF_16_BANK)); 3274 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3277 NUM_BANKS(ADDR_SURF_16_BANK)); 3278 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3281 NUM_BANKS(ADDR_SURF_16_BANK)); 3282 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3285 NUM_BANKS(ADDR_SURF_16_BANK)); 3286 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3289 NUM_BANKS(ADDR_SURF_8_BANK)); 3290 3291 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3292 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3293 reg_offset != 23) 3294 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3295 3296 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3297 if (reg_offset != 7) 3298 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3299 3300 break; 3301 default: 3302 dev_warn(adev->dev, 3303 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3304 adev->asic_type); 3305 3306 case CHIP_CARRIZO: 3307 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3311 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3312 PIPE_CONFIG(ADDR_SURF_P2) | 3313 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3315 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3316 PIPE_CONFIG(ADDR_SURF_P2) | 3317 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3319 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3320 PIPE_CONFIG(ADDR_SURF_P2) | 3321 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3323 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3324 PIPE_CONFIG(ADDR_SURF_P2) | 3325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3327 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3328 PIPE_CONFIG(ADDR_SURF_P2) | 3329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3331 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3332 PIPE_CONFIG(ADDR_SURF_P2) | 3333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3335 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3336 PIPE_CONFIG(ADDR_SURF_P2)); 3337 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3338 PIPE_CONFIG(ADDR_SURF_P2) | 3339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3341 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3342 PIPE_CONFIG(ADDR_SURF_P2) | 3343 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3345 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3346 PIPE_CONFIG(ADDR_SURF_P2) | 3347 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3349 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3350 PIPE_CONFIG(ADDR_SURF_P2) | 3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3353 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3354 PIPE_CONFIG(ADDR_SURF_P2) | 3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3357 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3358 PIPE_CONFIG(ADDR_SURF_P2) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3361 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3362 PIPE_CONFIG(ADDR_SURF_P2) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3365 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3366 PIPE_CONFIG(ADDR_SURF_P2) | 3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3369 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3370 PIPE_CONFIG(ADDR_SURF_P2) | 3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3373 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3374 PIPE_CONFIG(ADDR_SURF_P2) | 3375 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3377 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3378 PIPE_CONFIG(ADDR_SURF_P2) | 3379 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3381 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3382 PIPE_CONFIG(ADDR_SURF_P2) | 3383 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3385 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3386 PIPE_CONFIG(ADDR_SURF_P2) | 3387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3389 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3390 PIPE_CONFIG(ADDR_SURF_P2) | 3391 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3393 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3394 PIPE_CONFIG(ADDR_SURF_P2) | 3395 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3397 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3398 PIPE_CONFIG(ADDR_SURF_P2) | 3399 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3401 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3402 PIPE_CONFIG(ADDR_SURF_P2) | 3403 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3405 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3406 PIPE_CONFIG(ADDR_SURF_P2) | 3407 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3409 3410 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3413 NUM_BANKS(ADDR_SURF_8_BANK)); 3414 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3417 NUM_BANKS(ADDR_SURF_8_BANK)); 3418 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3421 NUM_BANKS(ADDR_SURF_8_BANK)); 3422 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3425 NUM_BANKS(ADDR_SURF_8_BANK)); 3426 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3429 NUM_BANKS(ADDR_SURF_8_BANK)); 3430 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3433 NUM_BANKS(ADDR_SURF_8_BANK)); 3434 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3437 NUM_BANKS(ADDR_SURF_8_BANK)); 3438 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3441 NUM_BANKS(ADDR_SURF_16_BANK)); 3442 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3445 NUM_BANKS(ADDR_SURF_16_BANK)); 3446 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3449 NUM_BANKS(ADDR_SURF_16_BANK)); 3450 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3453 NUM_BANKS(ADDR_SURF_16_BANK)); 3454 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3457 NUM_BANKS(ADDR_SURF_16_BANK)); 3458 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3461 NUM_BANKS(ADDR_SURF_16_BANK)); 3462 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3465 NUM_BANKS(ADDR_SURF_8_BANK)); 3466 3467 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3468 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3469 reg_offset != 23) 3470 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3471 3472 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3473 if (reg_offset != 7) 3474 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3475 3476 break; 3477 } 3478 } 3479 3480 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3481 u32 se_num, u32 sh_num, u32 instance) 3482 { 3483 u32 data; 3484 3485 if (instance == 0xffffffff) 3486 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3487 else 3488 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3489 3490 if (se_num == 0xffffffff) 3491 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3492 else 3493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3494 3495 if (sh_num == 0xffffffff) 3496 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3497 else 3498 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3499 3500 WREG32(mmGRBM_GFX_INDEX, data); 3501 } 3502 3503 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3504 { 3505 u32 data, mask; 3506 3507 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3508 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3509 3510 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3511 3512 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3513 adev->gfx.config.max_sh_per_se); 3514 3515 return (~data) & mask; 3516 } 3517 3518 static void 3519 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3520 { 3521 switch (adev->asic_type) { 3522 case CHIP_FIJI: 3523 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3524 RB_XSEL2(1) | PKR_MAP(2) | 3525 PKR_XSEL(1) | PKR_YSEL(1) | 3526 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3527 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3528 SE_PAIR_YSEL(2); 3529 break; 3530 case CHIP_TONGA: 3531 case CHIP_POLARIS10: 3532 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3533 SE_XSEL(1) | SE_YSEL(1); 3534 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3535 SE_PAIR_YSEL(2); 3536 break; 3537 case CHIP_TOPAZ: 3538 case CHIP_CARRIZO: 3539 *rconf |= RB_MAP_PKR0(2); 3540 *rconf1 |= 0x0; 3541 break; 3542 case CHIP_POLARIS11: 3543 case CHIP_POLARIS12: 3544 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3545 SE_XSEL(1) | SE_YSEL(1); 3546 *rconf1 |= 0x0; 3547 break; 3548 case CHIP_STONEY: 3549 *rconf |= 0x0; 3550 *rconf1 |= 0x0; 3551 break; 3552 default: 3553 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3554 break; 3555 } 3556 } 3557 3558 static void 3559 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3560 u32 raster_config, u32 raster_config_1, 3561 unsigned rb_mask, unsigned num_rb) 3562 { 3563 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3564 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3565 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3566 unsigned rb_per_se = num_rb / num_se; 3567 unsigned se_mask[4]; 3568 unsigned se; 3569 3570 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3571 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3572 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3573 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3574 3575 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3576 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3577 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3578 3579 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3580 (!se_mask[2] && !se_mask[3]))) { 3581 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3582 3583 if (!se_mask[0] && !se_mask[1]) { 3584 raster_config_1 |= 3585 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3586 } else { 3587 raster_config_1 |= 3588 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3589 } 3590 } 3591 3592 for (se = 0; se < num_se; se++) { 3593 unsigned raster_config_se = raster_config; 3594 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3595 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3596 int idx = (se / 2) * 2; 3597 3598 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3599 raster_config_se &= ~SE_MAP_MASK; 3600 3601 if (!se_mask[idx]) { 3602 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3603 } else { 3604 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3605 } 3606 } 3607 3608 pkr0_mask &= rb_mask; 3609 pkr1_mask &= rb_mask; 3610 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3611 raster_config_se &= ~PKR_MAP_MASK; 3612 3613 if (!pkr0_mask) { 3614 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3615 } else { 3616 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3617 } 3618 } 3619 3620 if (rb_per_se >= 2) { 3621 unsigned rb0_mask = 1 << (se * rb_per_se); 3622 unsigned rb1_mask = rb0_mask << 1; 3623 3624 rb0_mask &= rb_mask; 3625 rb1_mask &= rb_mask; 3626 if (!rb0_mask || !rb1_mask) { 3627 raster_config_se &= ~RB_MAP_PKR0_MASK; 3628 3629 if (!rb0_mask) { 3630 raster_config_se |= 3631 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3632 } else { 3633 raster_config_se |= 3634 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3635 } 3636 } 3637 3638 if (rb_per_se > 2) { 3639 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3640 rb1_mask = rb0_mask << 1; 3641 rb0_mask &= rb_mask; 3642 rb1_mask &= rb_mask; 3643 if (!rb0_mask || !rb1_mask) { 3644 raster_config_se &= ~RB_MAP_PKR1_MASK; 3645 3646 if (!rb0_mask) { 3647 raster_config_se |= 3648 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3649 } else { 3650 raster_config_se |= 3651 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3652 } 3653 } 3654 } 3655 } 3656 3657 /* GRBM_GFX_INDEX has a different offset on VI */ 3658 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3659 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3660 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3661 } 3662 3663 /* GRBM_GFX_INDEX has a different offset on VI */ 3664 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3665 } 3666 3667 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3668 { 3669 int i, j; 3670 u32 data; 3671 u32 raster_config = 0, raster_config_1 = 0; 3672 u32 active_rbs = 0; 3673 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3674 adev->gfx.config.max_sh_per_se; 3675 unsigned num_rb_pipes; 3676 3677 mutex_lock(&adev->grbm_idx_mutex); 3678 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3679 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3680 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3681 data = gfx_v8_0_get_rb_active_bitmap(adev); 3682 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3683 rb_bitmap_width_per_sh); 3684 } 3685 } 3686 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3687 3688 adev->gfx.config.backend_enable_mask = active_rbs; 3689 adev->gfx.config.num_rbs = hweight32(active_rbs); 3690 3691 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3692 adev->gfx.config.max_shader_engines, 16); 3693 3694 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3695 3696 if (!adev->gfx.config.backend_enable_mask || 3697 adev->gfx.config.num_rbs >= num_rb_pipes) { 3698 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3699 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3700 } else { 3701 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3702 adev->gfx.config.backend_enable_mask, 3703 num_rb_pipes); 3704 } 3705 3706 /* cache the values for userspace */ 3707 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3708 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3709 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3710 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3711 RREG32(mmCC_RB_BACKEND_DISABLE); 3712 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3713 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3714 adev->gfx.config.rb_config[i][j].raster_config = 3715 RREG32(mmPA_SC_RASTER_CONFIG); 3716 adev->gfx.config.rb_config[i][j].raster_config_1 = 3717 RREG32(mmPA_SC_RASTER_CONFIG_1); 3718 } 3719 } 3720 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3721 mutex_unlock(&adev->grbm_idx_mutex); 3722 } 3723 3724 /** 3725 * gfx_v8_0_init_compute_vmid - gart enable 3726 * 3727 * @adev: amdgpu_device pointer 3728 * 3729 * Initialize compute vmid sh_mem registers 3730 * 3731 */ 3732 #define DEFAULT_SH_MEM_BASES (0x6000) 3733 #define FIRST_COMPUTE_VMID (8) 3734 #define LAST_COMPUTE_VMID (16) 3735 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3736 { 3737 int i; 3738 uint32_t sh_mem_config; 3739 uint32_t sh_mem_bases; 3740 3741 /* 3742 * Configure apertures: 3743 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3744 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3745 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3746 */ 3747 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3748 3749 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3750 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3751 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3752 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3753 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3754 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3755 3756 mutex_lock(&adev->srbm_mutex); 3757 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3758 vi_srbm_select(adev, 0, 0, 0, i); 3759 /* CP and shaders */ 3760 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3761 WREG32(mmSH_MEM_APE1_BASE, 1); 3762 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3763 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3764 } 3765 vi_srbm_select(adev, 0, 0, 0, 0); 3766 mutex_unlock(&adev->srbm_mutex); 3767 } 3768 3769 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3770 { 3771 switch (adev->asic_type) { 3772 default: 3773 adev->gfx.config.double_offchip_lds_buf = 1; 3774 break; 3775 case CHIP_CARRIZO: 3776 case CHIP_STONEY: 3777 adev->gfx.config.double_offchip_lds_buf = 0; 3778 break; 3779 } 3780 } 3781 3782 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3783 { 3784 u32 tmp, sh_static_mem_cfg; 3785 int i; 3786 3787 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3788 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3789 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3790 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3791 3792 gfx_v8_0_tiling_mode_table_init(adev); 3793 gfx_v8_0_setup_rb(adev); 3794 gfx_v8_0_get_cu_info(adev); 3795 gfx_v8_0_config_init(adev); 3796 3797 /* XXX SH_MEM regs */ 3798 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3799 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3800 SWIZZLE_ENABLE, 1); 3801 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3802 ELEMENT_SIZE, 1); 3803 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3804 INDEX_STRIDE, 3); 3805 mutex_lock(&adev->srbm_mutex); 3806 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3807 vi_srbm_select(adev, 0, 0, 0, i); 3808 /* CP and shaders */ 3809 if (i == 0) { 3810 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3811 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3812 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3813 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3814 WREG32(mmSH_MEM_CONFIG, tmp); 3815 WREG32(mmSH_MEM_BASES, 0); 3816 } else { 3817 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3818 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3819 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3820 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3821 WREG32(mmSH_MEM_CONFIG, tmp); 3822 tmp = adev->mc.shared_aperture_start >> 48; 3823 WREG32(mmSH_MEM_BASES, tmp); 3824 } 3825 3826 WREG32(mmSH_MEM_APE1_BASE, 1); 3827 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3828 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3829 } 3830 vi_srbm_select(adev, 0, 0, 0, 0); 3831 mutex_unlock(&adev->srbm_mutex); 3832 3833 gfx_v8_0_init_compute_vmid(adev); 3834 3835 mutex_lock(&adev->grbm_idx_mutex); 3836 /* 3837 * making sure that the following register writes will be broadcasted 3838 * to all the shaders 3839 */ 3840 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3841 3842 WREG32(mmPA_SC_FIFO_SIZE, 3843 (adev->gfx.config.sc_prim_fifo_size_frontend << 3844 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3845 (adev->gfx.config.sc_prim_fifo_size_backend << 3846 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3847 (adev->gfx.config.sc_hiz_tile_fifo_size << 3848 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3849 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3850 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3851 3852 tmp = RREG32(mmSPI_ARB_PRIORITY); 3853 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3854 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3855 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3856 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3857 WREG32(mmSPI_ARB_PRIORITY, tmp); 3858 3859 mutex_unlock(&adev->grbm_idx_mutex); 3860 3861 } 3862 3863 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3864 { 3865 u32 i, j, k; 3866 u32 mask; 3867 3868 mutex_lock(&adev->grbm_idx_mutex); 3869 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3870 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3871 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3872 for (k = 0; k < adev->usec_timeout; k++) { 3873 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3874 break; 3875 udelay(1); 3876 } 3877 } 3878 } 3879 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3880 mutex_unlock(&adev->grbm_idx_mutex); 3881 3882 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3883 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3884 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3885 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3886 for (k = 0; k < adev->usec_timeout; k++) { 3887 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3888 break; 3889 udelay(1); 3890 } 3891 } 3892 3893 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3894 bool enable) 3895 { 3896 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3897 3898 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3899 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3900 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3901 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3902 3903 WREG32(mmCP_INT_CNTL_RING0, tmp); 3904 } 3905 3906 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3907 { 3908 /* csib */ 3909 WREG32(mmRLC_CSIB_ADDR_HI, 3910 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3911 WREG32(mmRLC_CSIB_ADDR_LO, 3912 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3913 WREG32(mmRLC_CSIB_LENGTH, 3914 adev->gfx.rlc.clear_state_size); 3915 } 3916 3917 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3918 int ind_offset, 3919 int list_size, 3920 int *unique_indices, 3921 int *indices_count, 3922 int max_indices, 3923 int *ind_start_offsets, 3924 int *offset_count, 3925 int max_offset) 3926 { 3927 int indices; 3928 bool new_entry = true; 3929 3930 for (; ind_offset < list_size; ind_offset++) { 3931 3932 if (new_entry) { 3933 new_entry = false; 3934 ind_start_offsets[*offset_count] = ind_offset; 3935 *offset_count = *offset_count + 1; 3936 BUG_ON(*offset_count >= max_offset); 3937 } 3938 3939 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3940 new_entry = true; 3941 continue; 3942 } 3943 3944 ind_offset += 2; 3945 3946 /* look for the matching indice */ 3947 for (indices = 0; 3948 indices < *indices_count; 3949 indices++) { 3950 if (unique_indices[indices] == 3951 register_list_format[ind_offset]) 3952 break; 3953 } 3954 3955 if (indices >= *indices_count) { 3956 unique_indices[*indices_count] = 3957 register_list_format[ind_offset]; 3958 indices = *indices_count; 3959 *indices_count = *indices_count + 1; 3960 BUG_ON(*indices_count >= max_indices); 3961 } 3962 3963 register_list_format[ind_offset] = indices; 3964 } 3965 } 3966 3967 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3968 { 3969 int i, temp, data; 3970 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3971 int indices_count = 0; 3972 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3973 int offset_count = 0; 3974 3975 int list_size; 3976 unsigned int *register_list_format = 3977 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3978 if (!register_list_format) 3979 return -ENOMEM; 3980 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3981 adev->gfx.rlc.reg_list_format_size_bytes); 3982 3983 gfx_v8_0_parse_ind_reg_list(register_list_format, 3984 RLC_FormatDirectRegListLength, 3985 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3986 unique_indices, 3987 &indices_count, 3988 sizeof(unique_indices) / sizeof(int), 3989 indirect_start_offsets, 3990 &offset_count, 3991 sizeof(indirect_start_offsets)/sizeof(int)); 3992 3993 /* save and restore list */ 3994 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3995 3996 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3997 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3998 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3999 4000 /* indirect list */ 4001 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4002 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4003 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4004 4005 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4006 list_size = list_size >> 1; 4007 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4008 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4009 4010 /* starting offsets starts */ 4011 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4012 adev->gfx.rlc.starting_offsets_start); 4013 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 4014 WREG32(mmRLC_GPM_SCRATCH_DATA, 4015 indirect_start_offsets[i]); 4016 4017 /* unique indices */ 4018 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4019 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4020 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 4021 if (unique_indices[i] != 0) { 4022 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4023 WREG32(data + i, unique_indices[i] >> 20); 4024 } 4025 } 4026 kfree(register_list_format); 4027 4028 return 0; 4029 } 4030 4031 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4032 { 4033 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4034 } 4035 4036 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4037 { 4038 uint32_t data; 4039 4040 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4041 4042 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4043 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4044 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4045 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4046 WREG32(mmRLC_PG_DELAY, data); 4047 4048 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4049 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4050 4051 } 4052 4053 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4054 bool enable) 4055 { 4056 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4057 } 4058 4059 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4060 bool enable) 4061 { 4062 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4063 } 4064 4065 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4066 { 4067 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4068 } 4069 4070 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4071 { 4072 if ((adev->asic_type == CHIP_CARRIZO) || 4073 (adev->asic_type == CHIP_STONEY)) { 4074 gfx_v8_0_init_csb(adev); 4075 gfx_v8_0_init_save_restore_list(adev); 4076 gfx_v8_0_enable_save_restore_machine(adev); 4077 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4078 gfx_v8_0_init_power_gating(adev); 4079 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4080 } else if ((adev->asic_type == CHIP_POLARIS11) || 4081 (adev->asic_type == CHIP_POLARIS12)) { 4082 gfx_v8_0_init_csb(adev); 4083 gfx_v8_0_init_save_restore_list(adev); 4084 gfx_v8_0_enable_save_restore_machine(adev); 4085 gfx_v8_0_init_power_gating(adev); 4086 } 4087 4088 } 4089 4090 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4091 { 4092 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4093 4094 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4095 gfx_v8_0_wait_for_rlc_serdes(adev); 4096 } 4097 4098 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4099 { 4100 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4101 udelay(50); 4102 4103 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4104 udelay(50); 4105 } 4106 4107 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4108 { 4109 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4110 4111 /* carrizo do enable cp interrupt after cp inited */ 4112 if (!(adev->flags & AMD_IS_APU)) 4113 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4114 4115 udelay(50); 4116 } 4117 4118 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4119 { 4120 const struct rlc_firmware_header_v2_0 *hdr; 4121 const __le32 *fw_data; 4122 unsigned i, fw_size; 4123 4124 if (!adev->gfx.rlc_fw) 4125 return -EINVAL; 4126 4127 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4128 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4129 4130 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4131 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4132 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4133 4134 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4135 for (i = 0; i < fw_size; i++) 4136 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4137 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4138 4139 return 0; 4140 } 4141 4142 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4143 { 4144 int r; 4145 u32 tmp; 4146 4147 gfx_v8_0_rlc_stop(adev); 4148 4149 /* disable CG */ 4150 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4151 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4152 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4153 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4154 if (adev->asic_type == CHIP_POLARIS11 || 4155 adev->asic_type == CHIP_POLARIS10 || 4156 adev->asic_type == CHIP_POLARIS12) { 4157 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4158 tmp &= ~0x3; 4159 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4160 } 4161 4162 /* disable PG */ 4163 WREG32(mmRLC_PG_CNTL, 0); 4164 4165 gfx_v8_0_rlc_reset(adev); 4166 gfx_v8_0_init_pg(adev); 4167 4168 if (!adev->pp_enabled) { 4169 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4170 /* legacy rlc firmware loading */ 4171 r = gfx_v8_0_rlc_load_microcode(adev); 4172 if (r) 4173 return r; 4174 } else { 4175 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4176 AMDGPU_UCODE_ID_RLC_G); 4177 if (r) 4178 return -EINVAL; 4179 } 4180 } 4181 4182 gfx_v8_0_rlc_start(adev); 4183 4184 return 0; 4185 } 4186 4187 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4188 { 4189 int i; 4190 u32 tmp = RREG32(mmCP_ME_CNTL); 4191 4192 if (enable) { 4193 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4195 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4196 } else { 4197 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4199 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4200 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4201 adev->gfx.gfx_ring[i].ready = false; 4202 } 4203 WREG32(mmCP_ME_CNTL, tmp); 4204 udelay(50); 4205 } 4206 4207 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4208 { 4209 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4210 const struct gfx_firmware_header_v1_0 *ce_hdr; 4211 const struct gfx_firmware_header_v1_0 *me_hdr; 4212 const __le32 *fw_data; 4213 unsigned i, fw_size; 4214 4215 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4216 return -EINVAL; 4217 4218 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4219 adev->gfx.pfp_fw->data; 4220 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4221 adev->gfx.ce_fw->data; 4222 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4223 adev->gfx.me_fw->data; 4224 4225 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4226 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4227 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4228 4229 gfx_v8_0_cp_gfx_enable(adev, false); 4230 4231 /* PFP */ 4232 fw_data = (const __le32 *) 4233 (adev->gfx.pfp_fw->data + 4234 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4235 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4236 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4237 for (i = 0; i < fw_size; i++) 4238 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4239 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4240 4241 /* CE */ 4242 fw_data = (const __le32 *) 4243 (adev->gfx.ce_fw->data + 4244 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4245 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4246 WREG32(mmCP_CE_UCODE_ADDR, 0); 4247 for (i = 0; i < fw_size; i++) 4248 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4249 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4250 4251 /* ME */ 4252 fw_data = (const __le32 *) 4253 (adev->gfx.me_fw->data + 4254 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4255 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4256 WREG32(mmCP_ME_RAM_WADDR, 0); 4257 for (i = 0; i < fw_size; i++) 4258 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4259 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4260 4261 return 0; 4262 } 4263 4264 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4265 { 4266 u32 count = 0; 4267 const struct cs_section_def *sect = NULL; 4268 const struct cs_extent_def *ext = NULL; 4269 4270 /* begin clear state */ 4271 count += 2; 4272 /* context control state */ 4273 count += 3; 4274 4275 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4276 for (ext = sect->section; ext->extent != NULL; ++ext) { 4277 if (sect->id == SECT_CONTEXT) 4278 count += 2 + ext->reg_count; 4279 else 4280 return 0; 4281 } 4282 } 4283 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4284 count += 4; 4285 /* end clear state */ 4286 count += 2; 4287 /* clear state */ 4288 count += 2; 4289 4290 return count; 4291 } 4292 4293 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4294 { 4295 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4296 const struct cs_section_def *sect = NULL; 4297 const struct cs_extent_def *ext = NULL; 4298 int r, i; 4299 4300 /* init the CP */ 4301 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4302 WREG32(mmCP_ENDIAN_SWAP, 0); 4303 WREG32(mmCP_DEVICE_ID, 1); 4304 4305 gfx_v8_0_cp_gfx_enable(adev, true); 4306 4307 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4308 if (r) { 4309 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4310 return r; 4311 } 4312 4313 /* clear state buffer */ 4314 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4315 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4316 4317 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4318 amdgpu_ring_write(ring, 0x80000000); 4319 amdgpu_ring_write(ring, 0x80000000); 4320 4321 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4322 for (ext = sect->section; ext->extent != NULL; ++ext) { 4323 if (sect->id == SECT_CONTEXT) { 4324 amdgpu_ring_write(ring, 4325 PACKET3(PACKET3_SET_CONTEXT_REG, 4326 ext->reg_count)); 4327 amdgpu_ring_write(ring, 4328 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4329 for (i = 0; i < ext->reg_count; i++) 4330 amdgpu_ring_write(ring, ext->extent[i]); 4331 } 4332 } 4333 } 4334 4335 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4336 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4337 switch (adev->asic_type) { 4338 case CHIP_TONGA: 4339 case CHIP_POLARIS10: 4340 amdgpu_ring_write(ring, 0x16000012); 4341 amdgpu_ring_write(ring, 0x0000002A); 4342 break; 4343 case CHIP_POLARIS11: 4344 case CHIP_POLARIS12: 4345 amdgpu_ring_write(ring, 0x16000012); 4346 amdgpu_ring_write(ring, 0x00000000); 4347 break; 4348 case CHIP_FIJI: 4349 amdgpu_ring_write(ring, 0x3a00161a); 4350 amdgpu_ring_write(ring, 0x0000002e); 4351 break; 4352 case CHIP_CARRIZO: 4353 amdgpu_ring_write(ring, 0x00000002); 4354 amdgpu_ring_write(ring, 0x00000000); 4355 break; 4356 case CHIP_TOPAZ: 4357 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4358 0x00000000 : 0x00000002); 4359 amdgpu_ring_write(ring, 0x00000000); 4360 break; 4361 case CHIP_STONEY: 4362 amdgpu_ring_write(ring, 0x00000000); 4363 amdgpu_ring_write(ring, 0x00000000); 4364 break; 4365 default: 4366 BUG(); 4367 } 4368 4369 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4370 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4371 4372 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4373 amdgpu_ring_write(ring, 0); 4374 4375 /* init the CE partitions */ 4376 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4377 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4378 amdgpu_ring_write(ring, 0x8000); 4379 amdgpu_ring_write(ring, 0x8000); 4380 4381 amdgpu_ring_commit(ring); 4382 4383 return 0; 4384 } 4385 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4386 { 4387 u32 tmp; 4388 /* no gfx doorbells on iceland */ 4389 if (adev->asic_type == CHIP_TOPAZ) 4390 return; 4391 4392 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4393 4394 if (ring->use_doorbell) { 4395 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4396 DOORBELL_OFFSET, ring->doorbell_index); 4397 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4398 DOORBELL_HIT, 0); 4399 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4400 DOORBELL_EN, 1); 4401 } else { 4402 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4403 } 4404 4405 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4406 4407 if (adev->flags & AMD_IS_APU) 4408 return; 4409 4410 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4411 DOORBELL_RANGE_LOWER, 4412 AMDGPU_DOORBELL_GFX_RING0); 4413 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4414 4415 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4416 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4417 } 4418 4419 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4420 { 4421 struct amdgpu_ring *ring; 4422 u32 tmp; 4423 u32 rb_bufsz; 4424 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4425 int r; 4426 4427 /* Set the write pointer delay */ 4428 WREG32(mmCP_RB_WPTR_DELAY, 0); 4429 4430 /* set the RB to use vmid 0 */ 4431 WREG32(mmCP_RB_VMID, 0); 4432 4433 /* Set ring buffer size */ 4434 ring = &adev->gfx.gfx_ring[0]; 4435 rb_bufsz = order_base_2(ring->ring_size / 8); 4436 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4437 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4438 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4439 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4440 #ifdef __BIG_ENDIAN 4441 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4442 #endif 4443 WREG32(mmCP_RB0_CNTL, tmp); 4444 4445 /* Initialize the ring buffer's read and write pointers */ 4446 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4447 ring->wptr = 0; 4448 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4449 4450 /* set the wb address wether it's enabled or not */ 4451 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4452 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4453 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4454 4455 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4456 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4457 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4458 mdelay(1); 4459 WREG32(mmCP_RB0_CNTL, tmp); 4460 4461 rb_addr = ring->gpu_addr >> 8; 4462 WREG32(mmCP_RB0_BASE, rb_addr); 4463 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4464 4465 gfx_v8_0_set_cpg_door_bell(adev, ring); 4466 /* start the ring */ 4467 amdgpu_ring_clear_ring(ring); 4468 gfx_v8_0_cp_gfx_start(adev); 4469 ring->ready = true; 4470 r = amdgpu_ring_test_ring(ring); 4471 if (r) 4472 ring->ready = false; 4473 4474 return r; 4475 } 4476 4477 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4478 { 4479 int i; 4480 4481 if (enable) { 4482 WREG32(mmCP_MEC_CNTL, 0); 4483 } else { 4484 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4485 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4486 adev->gfx.compute_ring[i].ready = false; 4487 adev->gfx.kiq.ring.ready = false; 4488 } 4489 udelay(50); 4490 } 4491 4492 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4493 { 4494 const struct gfx_firmware_header_v1_0 *mec_hdr; 4495 const __le32 *fw_data; 4496 unsigned i, fw_size; 4497 4498 if (!adev->gfx.mec_fw) 4499 return -EINVAL; 4500 4501 gfx_v8_0_cp_compute_enable(adev, false); 4502 4503 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4504 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4505 4506 fw_data = (const __le32 *) 4507 (adev->gfx.mec_fw->data + 4508 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4509 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4510 4511 /* MEC1 */ 4512 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4513 for (i = 0; i < fw_size; i++) 4514 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4515 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4516 4517 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4518 if (adev->gfx.mec2_fw) { 4519 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4520 4521 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4522 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4523 4524 fw_data = (const __le32 *) 4525 (adev->gfx.mec2_fw->data + 4526 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4527 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4528 4529 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4530 for (i = 0; i < fw_size; i++) 4531 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4532 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4533 } 4534 4535 return 0; 4536 } 4537 4538 /* KIQ functions */ 4539 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4540 { 4541 uint32_t tmp; 4542 struct amdgpu_device *adev = ring->adev; 4543 4544 /* tell RLC which is KIQ queue */ 4545 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4546 tmp &= 0xffffff00; 4547 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4548 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4549 tmp |= 0x80; 4550 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4551 } 4552 4553 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4554 { 4555 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4556 uint32_t scratch, tmp = 0; 4557 uint64_t queue_mask = 0; 4558 int r, i; 4559 4560 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4561 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4562 continue; 4563 4564 /* This situation may be hit in the future if a new HW 4565 * generation exposes more than 64 queues. If so, the 4566 * definition of queue_mask needs updating */ 4567 if (WARN_ON(i > (sizeof(queue_mask)*8))) { 4568 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4569 break; 4570 } 4571 4572 queue_mask |= (1ull << i); 4573 } 4574 4575 r = amdgpu_gfx_scratch_get(adev, &scratch); 4576 if (r) { 4577 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4578 return r; 4579 } 4580 WREG32(scratch, 0xCAFEDEAD); 4581 4582 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4583 if (r) { 4584 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4585 amdgpu_gfx_scratch_free(adev, scratch); 4586 return r; 4587 } 4588 /* set resources */ 4589 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4590 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4591 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4592 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4593 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4594 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4595 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4596 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4597 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4599 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4600 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4601 4602 /* map queues */ 4603 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4604 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4605 amdgpu_ring_write(kiq_ring, 4606 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4607 amdgpu_ring_write(kiq_ring, 4608 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4609 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4610 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4611 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4612 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4613 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4614 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4615 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4616 } 4617 /* write to scratch for completion */ 4618 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4619 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4620 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4621 amdgpu_ring_commit(kiq_ring); 4622 4623 for (i = 0; i < adev->usec_timeout; i++) { 4624 tmp = RREG32(scratch); 4625 if (tmp == 0xDEADBEEF) 4626 break; 4627 DRM_UDELAY(1); 4628 } 4629 if (i >= adev->usec_timeout) { 4630 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4631 scratch, tmp); 4632 r = -EINVAL; 4633 } 4634 amdgpu_gfx_scratch_free(adev, scratch); 4635 4636 return r; 4637 } 4638 4639 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4640 { 4641 int i, r = 0; 4642 4643 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4644 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4645 for (i = 0; i < adev->usec_timeout; i++) { 4646 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4647 break; 4648 udelay(1); 4649 } 4650 if (i == adev->usec_timeout) 4651 r = -ETIMEDOUT; 4652 } 4653 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4654 WREG32(mmCP_HQD_PQ_RPTR, 0); 4655 WREG32(mmCP_HQD_PQ_WPTR, 0); 4656 4657 return r; 4658 } 4659 4660 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4661 { 4662 struct amdgpu_device *adev = ring->adev; 4663 struct vi_mqd *mqd = ring->mqd_ptr; 4664 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4665 uint32_t tmp; 4666 4667 mqd->header = 0xC0310800; 4668 mqd->compute_pipelinestat_enable = 0x00000001; 4669 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4670 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4671 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4672 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4673 mqd->compute_misc_reserved = 0x00000003; 4674 if (!(adev->flags & AMD_IS_APU)) { 4675 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4676 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); 4677 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4678 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); 4679 } 4680 eop_base_addr = ring->eop_gpu_addr >> 8; 4681 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4682 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4683 4684 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4685 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4686 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4687 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4688 4689 mqd->cp_hqd_eop_control = tmp; 4690 4691 /* enable doorbell? */ 4692 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4693 CP_HQD_PQ_DOORBELL_CONTROL, 4694 DOORBELL_EN, 4695 ring->use_doorbell ? 1 : 0); 4696 4697 mqd->cp_hqd_pq_doorbell_control = tmp; 4698 4699 /* set the pointer to the MQD */ 4700 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4701 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4702 4703 /* set MQD vmid to 0 */ 4704 tmp = RREG32(mmCP_MQD_CONTROL); 4705 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4706 mqd->cp_mqd_control = tmp; 4707 4708 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4709 hqd_gpu_addr = ring->gpu_addr >> 8; 4710 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4711 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4712 4713 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4714 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4715 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4716 (order_base_2(ring->ring_size / 4) - 1)); 4717 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4718 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4719 #ifdef __BIG_ENDIAN 4720 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4721 #endif 4722 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4723 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4724 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4726 mqd->cp_hqd_pq_control = tmp; 4727 4728 /* set the wb address whether it's enabled or not */ 4729 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4730 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4731 mqd->cp_hqd_pq_rptr_report_addr_hi = 4732 upper_32_bits(wb_gpu_addr) & 0xffff; 4733 4734 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4735 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4736 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4737 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4738 4739 tmp = 0; 4740 /* enable the doorbell if requested */ 4741 if (ring->use_doorbell) { 4742 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4743 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4744 DOORBELL_OFFSET, ring->doorbell_index); 4745 4746 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4747 DOORBELL_EN, 1); 4748 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4749 DOORBELL_SOURCE, 0); 4750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4751 DOORBELL_HIT, 0); 4752 } 4753 4754 mqd->cp_hqd_pq_doorbell_control = tmp; 4755 4756 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4757 ring->wptr = 0; 4758 mqd->cp_hqd_pq_wptr = ring->wptr; 4759 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4760 4761 /* set the vmid for the queue */ 4762 mqd->cp_hqd_vmid = 0; 4763 4764 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4766 mqd->cp_hqd_persistent_state = tmp; 4767 4768 /* set MTYPE */ 4769 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4770 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4771 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4772 mqd->cp_hqd_ib_control = tmp; 4773 4774 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4775 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4776 mqd->cp_hqd_iq_timer = tmp; 4777 4778 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4779 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4780 mqd->cp_hqd_ctx_save_control = tmp; 4781 4782 /* defaults */ 4783 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4784 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4785 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4786 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4787 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4788 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4789 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4790 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4791 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4792 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4793 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4794 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4795 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4796 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4797 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4798 4799 /* activate the queue */ 4800 mqd->cp_hqd_active = 1; 4801 4802 return 0; 4803 } 4804 4805 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4806 struct vi_mqd *mqd) 4807 { 4808 uint32_t mqd_reg; 4809 uint32_t *mqd_data; 4810 4811 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4812 mqd_data = &mqd->cp_mqd_base_addr_lo; 4813 4814 /* disable wptr polling */ 4815 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4816 4817 /* program all HQD registers */ 4818 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4819 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4820 4821 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4822 * This is safe since EOP RPTR==WPTR for any inactive HQD 4823 * on ASICs that do not support context-save. 4824 * EOP writes/reads can start anywhere in the ring. 4825 */ 4826 if (adev->asic_type != CHIP_TONGA) { 4827 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4828 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4829 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4830 } 4831 4832 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4833 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4834 4835 /* activate the HQD */ 4836 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4837 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4838 4839 return 0; 4840 } 4841 4842 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4843 { 4844 struct amdgpu_device *adev = ring->adev; 4845 struct vi_mqd *mqd = ring->mqd_ptr; 4846 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4847 4848 gfx_v8_0_kiq_setting(ring); 4849 4850 if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4851 /* reset MQD to a clean status */ 4852 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4853 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4854 4855 /* reset ring buffer */ 4856 ring->wptr = 0; 4857 amdgpu_ring_clear_ring(ring); 4858 mutex_lock(&adev->srbm_mutex); 4859 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4860 gfx_v8_0_mqd_commit(adev, mqd); 4861 vi_srbm_select(adev, 0, 0, 0, 0); 4862 mutex_unlock(&adev->srbm_mutex); 4863 } else { 4864 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4865 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; 4866 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; 4867 mutex_lock(&adev->srbm_mutex); 4868 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4869 gfx_v8_0_mqd_init(ring); 4870 gfx_v8_0_mqd_commit(adev, mqd); 4871 vi_srbm_select(adev, 0, 0, 0, 0); 4872 mutex_unlock(&adev->srbm_mutex); 4873 4874 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4875 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4876 } 4877 4878 return 0; 4879 } 4880 4881 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4882 { 4883 struct amdgpu_device *adev = ring->adev; 4884 struct vi_mqd *mqd = ring->mqd_ptr; 4885 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4886 4887 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { 4888 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4889 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; 4890 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; 4891 mutex_lock(&adev->srbm_mutex); 4892 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4893 gfx_v8_0_mqd_init(ring); 4894 vi_srbm_select(adev, 0, 0, 0, 0); 4895 mutex_unlock(&adev->srbm_mutex); 4896 4897 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4898 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4899 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4900 /* reset MQD to a clean status */ 4901 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4902 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4903 /* reset ring buffer */ 4904 ring->wptr = 0; 4905 amdgpu_ring_clear_ring(ring); 4906 } else { 4907 amdgpu_ring_clear_ring(ring); 4908 } 4909 return 0; 4910 } 4911 4912 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4913 { 4914 if (adev->asic_type > CHIP_TONGA) { 4915 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4916 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4917 } 4918 /* enable doorbells */ 4919 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4920 } 4921 4922 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4923 { 4924 struct amdgpu_ring *ring = NULL; 4925 int r = 0, i; 4926 4927 gfx_v8_0_cp_compute_enable(adev, true); 4928 4929 ring = &adev->gfx.kiq.ring; 4930 4931 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4932 if (unlikely(r != 0)) 4933 goto done; 4934 4935 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4936 if (!r) { 4937 r = gfx_v8_0_kiq_init_queue(ring); 4938 amdgpu_bo_kunmap(ring->mqd_obj); 4939 ring->mqd_ptr = NULL; 4940 } 4941 amdgpu_bo_unreserve(ring->mqd_obj); 4942 if (r) 4943 goto done; 4944 4945 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4946 ring = &adev->gfx.compute_ring[i]; 4947 4948 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4949 if (unlikely(r != 0)) 4950 goto done; 4951 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4952 if (!r) { 4953 r = gfx_v8_0_kcq_init_queue(ring); 4954 amdgpu_bo_kunmap(ring->mqd_obj); 4955 ring->mqd_ptr = NULL; 4956 } 4957 amdgpu_bo_unreserve(ring->mqd_obj); 4958 if (r) 4959 goto done; 4960 } 4961 4962 gfx_v8_0_set_mec_doorbell_range(adev); 4963 4964 r = gfx_v8_0_kiq_kcq_enable(adev); 4965 if (r) 4966 goto done; 4967 4968 /* Test KIQ */ 4969 ring = &adev->gfx.kiq.ring; 4970 ring->ready = true; 4971 r = amdgpu_ring_test_ring(ring); 4972 if (r) { 4973 ring->ready = false; 4974 goto done; 4975 } 4976 4977 /* Test KCQs */ 4978 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4979 ring = &adev->gfx.compute_ring[i]; 4980 ring->ready = true; 4981 r = amdgpu_ring_test_ring(ring); 4982 if (r) 4983 ring->ready = false; 4984 } 4985 4986 done: 4987 return r; 4988 } 4989 4990 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4991 { 4992 int r; 4993 4994 if (!(adev->flags & AMD_IS_APU)) 4995 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4996 4997 if (!adev->pp_enabled) { 4998 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4999 /* legacy firmware loading */ 5000 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5001 if (r) 5002 return r; 5003 5004 r = gfx_v8_0_cp_compute_load_microcode(adev); 5005 if (r) 5006 return r; 5007 } else { 5008 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5009 AMDGPU_UCODE_ID_CP_CE); 5010 if (r) 5011 return -EINVAL; 5012 5013 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5014 AMDGPU_UCODE_ID_CP_PFP); 5015 if (r) 5016 return -EINVAL; 5017 5018 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5019 AMDGPU_UCODE_ID_CP_ME); 5020 if (r) 5021 return -EINVAL; 5022 5023 if (adev->asic_type == CHIP_TOPAZ) { 5024 r = gfx_v8_0_cp_compute_load_microcode(adev); 5025 if (r) 5026 return r; 5027 } else { 5028 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5029 AMDGPU_UCODE_ID_CP_MEC1); 5030 if (r) 5031 return -EINVAL; 5032 } 5033 } 5034 } 5035 5036 r = gfx_v8_0_cp_gfx_resume(adev); 5037 if (r) 5038 return r; 5039 5040 r = gfx_v8_0_kiq_resume(adev); 5041 if (r) 5042 return r; 5043 5044 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5045 5046 return 0; 5047 } 5048 5049 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5050 { 5051 gfx_v8_0_cp_gfx_enable(adev, enable); 5052 gfx_v8_0_cp_compute_enable(adev, enable); 5053 } 5054 5055 static int gfx_v8_0_hw_init(void *handle) 5056 { 5057 int r; 5058 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5059 5060 gfx_v8_0_init_golden_registers(adev); 5061 gfx_v8_0_gpu_init(adev); 5062 5063 r = gfx_v8_0_rlc_resume(adev); 5064 if (r) 5065 return r; 5066 5067 r = gfx_v8_0_cp_resume(adev); 5068 5069 return r; 5070 } 5071 5072 static int gfx_v8_0_hw_fini(void *handle) 5073 { 5074 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5075 5076 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5077 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5078 if (amdgpu_sriov_vf(adev)) { 5079 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5080 return 0; 5081 } 5082 gfx_v8_0_cp_enable(adev, false); 5083 gfx_v8_0_rlc_stop(adev); 5084 5085 amdgpu_set_powergating_state(adev, 5086 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5087 5088 return 0; 5089 } 5090 5091 static int gfx_v8_0_suspend(void *handle) 5092 { 5093 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5094 adev->gfx.in_suspend = true; 5095 return gfx_v8_0_hw_fini(adev); 5096 } 5097 5098 static int gfx_v8_0_resume(void *handle) 5099 { 5100 int r; 5101 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5102 5103 r = gfx_v8_0_hw_init(adev); 5104 adev->gfx.in_suspend = false; 5105 return r; 5106 } 5107 5108 static bool gfx_v8_0_is_idle(void *handle) 5109 { 5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5111 5112 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5113 return false; 5114 else 5115 return true; 5116 } 5117 5118 static int gfx_v8_0_wait_for_idle(void *handle) 5119 { 5120 unsigned i; 5121 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5122 5123 for (i = 0; i < adev->usec_timeout; i++) { 5124 if (gfx_v8_0_is_idle(handle)) 5125 return 0; 5126 5127 udelay(1); 5128 } 5129 return -ETIMEDOUT; 5130 } 5131 5132 static bool gfx_v8_0_check_soft_reset(void *handle) 5133 { 5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5135 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5136 u32 tmp; 5137 5138 /* GRBM_STATUS */ 5139 tmp = RREG32(mmGRBM_STATUS); 5140 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5141 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5142 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5143 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5144 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5145 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5146 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5148 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5149 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5150 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5151 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5152 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5153 } 5154 5155 /* GRBM_STATUS2 */ 5156 tmp = RREG32(mmGRBM_STATUS2); 5157 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5158 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5159 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5160 5161 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5162 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5163 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5165 SOFT_RESET_CPF, 1); 5166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5167 SOFT_RESET_CPC, 1); 5168 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5169 SOFT_RESET_CPG, 1); 5170 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5171 SOFT_RESET_GRBM, 1); 5172 } 5173 5174 /* SRBM_STATUS */ 5175 tmp = RREG32(mmSRBM_STATUS); 5176 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5177 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5178 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5179 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5180 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5181 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5182 5183 if (grbm_soft_reset || srbm_soft_reset) { 5184 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5185 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5186 return true; 5187 } else { 5188 adev->gfx.grbm_soft_reset = 0; 5189 adev->gfx.srbm_soft_reset = 0; 5190 return false; 5191 } 5192 } 5193 5194 static int gfx_v8_0_pre_soft_reset(void *handle) 5195 { 5196 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5197 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5198 5199 if ((!adev->gfx.grbm_soft_reset) && 5200 (!adev->gfx.srbm_soft_reset)) 5201 return 0; 5202 5203 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5204 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5205 5206 /* stop the rlc */ 5207 gfx_v8_0_rlc_stop(adev); 5208 5209 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5210 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5211 /* Disable GFX parsing/prefetching */ 5212 gfx_v8_0_cp_gfx_enable(adev, false); 5213 5214 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5215 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5216 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5217 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5218 int i; 5219 5220 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5221 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5222 5223 mutex_lock(&adev->srbm_mutex); 5224 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5225 gfx_v8_0_deactivate_hqd(adev, 2); 5226 vi_srbm_select(adev, 0, 0, 0, 0); 5227 mutex_unlock(&adev->srbm_mutex); 5228 } 5229 /* Disable MEC parsing/prefetching */ 5230 gfx_v8_0_cp_compute_enable(adev, false); 5231 } 5232 5233 return 0; 5234 } 5235 5236 static int gfx_v8_0_soft_reset(void *handle) 5237 { 5238 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5239 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5240 u32 tmp; 5241 5242 if ((!adev->gfx.grbm_soft_reset) && 5243 (!adev->gfx.srbm_soft_reset)) 5244 return 0; 5245 5246 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5247 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5248 5249 if (grbm_soft_reset || srbm_soft_reset) { 5250 tmp = RREG32(mmGMCON_DEBUG); 5251 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5252 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5253 WREG32(mmGMCON_DEBUG, tmp); 5254 udelay(50); 5255 } 5256 5257 if (grbm_soft_reset) { 5258 tmp = RREG32(mmGRBM_SOFT_RESET); 5259 tmp |= grbm_soft_reset; 5260 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5261 WREG32(mmGRBM_SOFT_RESET, tmp); 5262 tmp = RREG32(mmGRBM_SOFT_RESET); 5263 5264 udelay(50); 5265 5266 tmp &= ~grbm_soft_reset; 5267 WREG32(mmGRBM_SOFT_RESET, tmp); 5268 tmp = RREG32(mmGRBM_SOFT_RESET); 5269 } 5270 5271 if (srbm_soft_reset) { 5272 tmp = RREG32(mmSRBM_SOFT_RESET); 5273 tmp |= srbm_soft_reset; 5274 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5275 WREG32(mmSRBM_SOFT_RESET, tmp); 5276 tmp = RREG32(mmSRBM_SOFT_RESET); 5277 5278 udelay(50); 5279 5280 tmp &= ~srbm_soft_reset; 5281 WREG32(mmSRBM_SOFT_RESET, tmp); 5282 tmp = RREG32(mmSRBM_SOFT_RESET); 5283 } 5284 5285 if (grbm_soft_reset || srbm_soft_reset) { 5286 tmp = RREG32(mmGMCON_DEBUG); 5287 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5288 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5289 WREG32(mmGMCON_DEBUG, tmp); 5290 } 5291 5292 /* Wait a little for things to settle down */ 5293 udelay(50); 5294 5295 return 0; 5296 } 5297 5298 static int gfx_v8_0_post_soft_reset(void *handle) 5299 { 5300 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5301 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5302 5303 if ((!adev->gfx.grbm_soft_reset) && 5304 (!adev->gfx.srbm_soft_reset)) 5305 return 0; 5306 5307 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5308 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5309 5310 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5311 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5312 gfx_v8_0_cp_gfx_resume(adev); 5313 5314 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5315 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5316 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5317 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5318 int i; 5319 5320 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5321 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5322 5323 mutex_lock(&adev->srbm_mutex); 5324 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5325 gfx_v8_0_deactivate_hqd(adev, 2); 5326 vi_srbm_select(adev, 0, 0, 0, 0); 5327 mutex_unlock(&adev->srbm_mutex); 5328 } 5329 gfx_v8_0_kiq_resume(adev); 5330 } 5331 gfx_v8_0_rlc_start(adev); 5332 5333 return 0; 5334 } 5335 5336 /** 5337 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5338 * 5339 * @adev: amdgpu_device pointer 5340 * 5341 * Fetches a GPU clock counter snapshot. 5342 * Returns the 64 bit clock counter snapshot. 5343 */ 5344 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5345 { 5346 uint64_t clock; 5347 5348 mutex_lock(&adev->gfx.gpu_clock_mutex); 5349 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5350 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5351 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5352 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5353 return clock; 5354 } 5355 5356 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5357 uint32_t vmid, 5358 uint32_t gds_base, uint32_t gds_size, 5359 uint32_t gws_base, uint32_t gws_size, 5360 uint32_t oa_base, uint32_t oa_size) 5361 { 5362 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5363 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5364 5365 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5366 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5367 5368 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5369 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5370 5371 /* GDS Base */ 5372 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5373 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5374 WRITE_DATA_DST_SEL(0))); 5375 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5376 amdgpu_ring_write(ring, 0); 5377 amdgpu_ring_write(ring, gds_base); 5378 5379 /* GDS Size */ 5380 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5381 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5382 WRITE_DATA_DST_SEL(0))); 5383 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5384 amdgpu_ring_write(ring, 0); 5385 amdgpu_ring_write(ring, gds_size); 5386 5387 /* GWS */ 5388 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5389 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5390 WRITE_DATA_DST_SEL(0))); 5391 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5392 amdgpu_ring_write(ring, 0); 5393 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5394 5395 /* OA */ 5396 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5397 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5398 WRITE_DATA_DST_SEL(0))); 5399 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5400 amdgpu_ring_write(ring, 0); 5401 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5402 } 5403 5404 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5405 { 5406 WREG32(mmSQ_IND_INDEX, 5407 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5408 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5409 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5410 (SQ_IND_INDEX__FORCE_READ_MASK)); 5411 return RREG32(mmSQ_IND_DATA); 5412 } 5413 5414 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5415 uint32_t wave, uint32_t thread, 5416 uint32_t regno, uint32_t num, uint32_t *out) 5417 { 5418 WREG32(mmSQ_IND_INDEX, 5419 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5420 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5421 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5422 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5423 (SQ_IND_INDEX__FORCE_READ_MASK) | 5424 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5425 while (num--) 5426 *(out++) = RREG32(mmSQ_IND_DATA); 5427 } 5428 5429 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5430 { 5431 /* type 0 wave data */ 5432 dst[(*no_fields)++] = 0; 5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5434 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5435 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5436 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5437 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5438 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5439 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5440 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5441 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5449 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5450 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5451 } 5452 5453 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5454 uint32_t wave, uint32_t start, 5455 uint32_t size, uint32_t *dst) 5456 { 5457 wave_read_regs( 5458 adev, simd, wave, 0, 5459 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5460 } 5461 5462 5463 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5464 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5465 .select_se_sh = &gfx_v8_0_select_se_sh, 5466 .read_wave_data = &gfx_v8_0_read_wave_data, 5467 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5468 }; 5469 5470 static int gfx_v8_0_early_init(void *handle) 5471 { 5472 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5473 5474 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5475 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5476 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5477 gfx_v8_0_set_ring_funcs(adev); 5478 gfx_v8_0_set_irq_funcs(adev); 5479 gfx_v8_0_set_gds_init(adev); 5480 gfx_v8_0_set_rlc_funcs(adev); 5481 5482 return 0; 5483 } 5484 5485 static int gfx_v8_0_late_init(void *handle) 5486 { 5487 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5488 int r; 5489 5490 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5491 if (r) 5492 return r; 5493 5494 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5495 if (r) 5496 return r; 5497 5498 /* requires IBs so do in late init after IB pool is initialized */ 5499 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5500 if (r) 5501 return r; 5502 5503 amdgpu_set_powergating_state(adev, 5504 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5505 5506 return 0; 5507 } 5508 5509 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5510 bool enable) 5511 { 5512 if ((adev->asic_type == CHIP_POLARIS11) || 5513 (adev->asic_type == CHIP_POLARIS12)) 5514 /* Send msg to SMU via Powerplay */ 5515 amdgpu_set_powergating_state(adev, 5516 AMD_IP_BLOCK_TYPE_SMC, 5517 enable ? 5518 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5519 5520 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5521 } 5522 5523 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5524 bool enable) 5525 { 5526 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5527 } 5528 5529 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5530 bool enable) 5531 { 5532 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5533 } 5534 5535 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5536 bool enable) 5537 { 5538 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5539 } 5540 5541 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5542 bool enable) 5543 { 5544 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5545 5546 /* Read any GFX register to wake up GFX. */ 5547 if (!enable) 5548 RREG32(mmDB_RENDER_CONTROL); 5549 } 5550 5551 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5552 bool enable) 5553 { 5554 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5555 cz_enable_gfx_cg_power_gating(adev, true); 5556 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5557 cz_enable_gfx_pipeline_power_gating(adev, true); 5558 } else { 5559 cz_enable_gfx_cg_power_gating(adev, false); 5560 cz_enable_gfx_pipeline_power_gating(adev, false); 5561 } 5562 } 5563 5564 static int gfx_v8_0_set_powergating_state(void *handle, 5565 enum amd_powergating_state state) 5566 { 5567 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5568 bool enable = (state == AMD_PG_STATE_GATE); 5569 5570 if (amdgpu_sriov_vf(adev)) 5571 return 0; 5572 5573 switch (adev->asic_type) { 5574 case CHIP_CARRIZO: 5575 case CHIP_STONEY: 5576 5577 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5578 cz_enable_sck_slow_down_on_power_up(adev, true); 5579 cz_enable_sck_slow_down_on_power_down(adev, true); 5580 } else { 5581 cz_enable_sck_slow_down_on_power_up(adev, false); 5582 cz_enable_sck_slow_down_on_power_down(adev, false); 5583 } 5584 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5585 cz_enable_cp_power_gating(adev, true); 5586 else 5587 cz_enable_cp_power_gating(adev, false); 5588 5589 cz_update_gfx_cg_power_gating(adev, enable); 5590 5591 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5592 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5593 else 5594 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5595 5596 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5597 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5598 else 5599 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5600 break; 5601 case CHIP_POLARIS11: 5602 case CHIP_POLARIS12: 5603 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5605 else 5606 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5607 5608 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5610 else 5611 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5612 5613 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5614 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5615 else 5616 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5617 break; 5618 default: 5619 break; 5620 } 5621 5622 return 0; 5623 } 5624 5625 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5626 { 5627 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5628 int data; 5629 5630 if (amdgpu_sriov_vf(adev)) 5631 *flags = 0; 5632 5633 /* AMD_CG_SUPPORT_GFX_MGCG */ 5634 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5635 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5636 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5637 5638 /* AMD_CG_SUPPORT_GFX_CGLG */ 5639 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5640 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5641 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5642 5643 /* AMD_CG_SUPPORT_GFX_CGLS */ 5644 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5645 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5646 5647 /* AMD_CG_SUPPORT_GFX_CGTS */ 5648 data = RREG32(mmCGTS_SM_CTRL_REG); 5649 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5650 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5651 5652 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5653 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5654 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5655 5656 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5657 data = RREG32(mmRLC_MEM_SLP_CNTL); 5658 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5659 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5660 5661 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5662 data = RREG32(mmCP_MEM_SLP_CNTL); 5663 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5664 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5665 } 5666 5667 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5668 uint32_t reg_addr, uint32_t cmd) 5669 { 5670 uint32_t data; 5671 5672 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5673 5674 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5675 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5676 5677 data = RREG32(mmRLC_SERDES_WR_CTRL); 5678 if (adev->asic_type == CHIP_STONEY) 5679 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5680 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5681 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5682 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5683 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5684 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5685 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5686 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5687 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5688 else 5689 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5690 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5691 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5692 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5693 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5694 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5695 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5696 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5697 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5698 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5699 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5700 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5701 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5702 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5703 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5704 5705 WREG32(mmRLC_SERDES_WR_CTRL, data); 5706 } 5707 5708 #define MSG_ENTER_RLC_SAFE_MODE 1 5709 #define MSG_EXIT_RLC_SAFE_MODE 0 5710 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5711 #define RLC_GPR_REG2__REQ__SHIFT 0 5712 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5713 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5714 5715 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5716 { 5717 u32 data; 5718 unsigned i; 5719 5720 data = RREG32(mmRLC_CNTL); 5721 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5722 return; 5723 5724 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5725 data |= RLC_SAFE_MODE__CMD_MASK; 5726 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5727 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5728 WREG32(mmRLC_SAFE_MODE, data); 5729 5730 for (i = 0; i < adev->usec_timeout; i++) { 5731 if ((RREG32(mmRLC_GPM_STAT) & 5732 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5733 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5734 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5735 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5736 break; 5737 udelay(1); 5738 } 5739 5740 for (i = 0; i < adev->usec_timeout; i++) { 5741 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5742 break; 5743 udelay(1); 5744 } 5745 adev->gfx.rlc.in_safe_mode = true; 5746 } 5747 } 5748 5749 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5750 { 5751 u32 data = 0; 5752 unsigned i; 5753 5754 data = RREG32(mmRLC_CNTL); 5755 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5756 return; 5757 5758 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5759 if (adev->gfx.rlc.in_safe_mode) { 5760 data |= RLC_SAFE_MODE__CMD_MASK; 5761 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5762 WREG32(mmRLC_SAFE_MODE, data); 5763 adev->gfx.rlc.in_safe_mode = false; 5764 } 5765 } 5766 5767 for (i = 0; i < adev->usec_timeout; i++) { 5768 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5769 break; 5770 udelay(1); 5771 } 5772 } 5773 5774 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5775 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5776 .exit_safe_mode = iceland_exit_rlc_safe_mode 5777 }; 5778 5779 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5780 bool enable) 5781 { 5782 uint32_t temp, data; 5783 5784 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5785 5786 /* It is disabled by HW by default */ 5787 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5788 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5789 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5790 /* 1 - RLC memory Light sleep */ 5791 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5792 5793 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5794 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5795 } 5796 5797 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5798 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5799 if (adev->flags & AMD_IS_APU) 5800 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5801 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5802 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5803 else 5804 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5805 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5806 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5807 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5808 5809 if (temp != data) 5810 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5811 5812 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5813 gfx_v8_0_wait_for_rlc_serdes(adev); 5814 5815 /* 5 - clear mgcg override */ 5816 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5817 5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5819 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5820 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5821 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5822 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5823 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5824 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5825 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5826 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5827 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5828 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5829 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5830 if (temp != data) 5831 WREG32(mmCGTS_SM_CTRL_REG, data); 5832 } 5833 udelay(50); 5834 5835 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5836 gfx_v8_0_wait_for_rlc_serdes(adev); 5837 } else { 5838 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5839 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5840 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5841 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5842 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5843 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5844 if (temp != data) 5845 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5846 5847 /* 2 - disable MGLS in RLC */ 5848 data = RREG32(mmRLC_MEM_SLP_CNTL); 5849 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5850 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5851 WREG32(mmRLC_MEM_SLP_CNTL, data); 5852 } 5853 5854 /* 3 - disable MGLS in CP */ 5855 data = RREG32(mmCP_MEM_SLP_CNTL); 5856 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5857 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5858 WREG32(mmCP_MEM_SLP_CNTL, data); 5859 } 5860 5861 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5862 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5863 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5864 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5865 if (temp != data) 5866 WREG32(mmCGTS_SM_CTRL_REG, data); 5867 5868 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5869 gfx_v8_0_wait_for_rlc_serdes(adev); 5870 5871 /* 6 - set mgcg override */ 5872 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5873 5874 udelay(50); 5875 5876 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5877 gfx_v8_0_wait_for_rlc_serdes(adev); 5878 } 5879 5880 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5881 } 5882 5883 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5884 bool enable) 5885 { 5886 uint32_t temp, temp1, data, data1; 5887 5888 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5889 5890 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5891 5892 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5893 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5894 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5895 if (temp1 != data1) 5896 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5897 5898 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5899 gfx_v8_0_wait_for_rlc_serdes(adev); 5900 5901 /* 2 - clear cgcg override */ 5902 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5903 5904 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5905 gfx_v8_0_wait_for_rlc_serdes(adev); 5906 5907 /* 3 - write cmd to set CGLS */ 5908 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5909 5910 /* 4 - enable cgcg */ 5911 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5912 5913 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5914 /* enable cgls*/ 5915 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5916 5917 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5918 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5919 5920 if (temp1 != data1) 5921 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5922 } else { 5923 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5924 } 5925 5926 if (temp != data) 5927 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5928 5929 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5930 * Cmp_busy/GFX_Idle interrupts 5931 */ 5932 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5933 } else { 5934 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5935 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5936 5937 /* TEST CGCG */ 5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5939 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5940 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5941 if (temp1 != data1) 5942 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5943 5944 /* read gfx register to wake up cgcg */ 5945 RREG32(mmCB_CGTT_SCLK_CTRL); 5946 RREG32(mmCB_CGTT_SCLK_CTRL); 5947 RREG32(mmCB_CGTT_SCLK_CTRL); 5948 RREG32(mmCB_CGTT_SCLK_CTRL); 5949 5950 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5951 gfx_v8_0_wait_for_rlc_serdes(adev); 5952 5953 /* write cmd to Set CGCG Overrride */ 5954 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5955 5956 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5957 gfx_v8_0_wait_for_rlc_serdes(adev); 5958 5959 /* write cmd to Clear CGLS */ 5960 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5961 5962 /* disable cgcg, cgls should be disabled too. */ 5963 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5964 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5965 if (temp != data) 5966 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5967 /* enable interrupts again for PG */ 5968 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5969 } 5970 5971 gfx_v8_0_wait_for_rlc_serdes(adev); 5972 5973 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5974 } 5975 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5976 bool enable) 5977 { 5978 if (enable) { 5979 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5980 * === MGCG + MGLS + TS(CG/LS) === 5981 */ 5982 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5983 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5984 } else { 5985 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5986 * === CGCG + CGLS === 5987 */ 5988 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5989 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5990 } 5991 return 0; 5992 } 5993 5994 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5995 enum amd_clockgating_state state) 5996 { 5997 uint32_t msg_id, pp_state = 0; 5998 uint32_t pp_support_state = 0; 5999 void *pp_handle = adev->powerplay.pp_handle; 6000 6001 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6002 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6003 pp_support_state = PP_STATE_SUPPORT_LS; 6004 pp_state = PP_STATE_LS; 6005 } 6006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6007 pp_support_state |= PP_STATE_SUPPORT_CG; 6008 pp_state |= PP_STATE_CG; 6009 } 6010 if (state == AMD_CG_STATE_UNGATE) 6011 pp_state = 0; 6012 6013 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6014 PP_BLOCK_GFX_CG, 6015 pp_support_state, 6016 pp_state); 6017 amd_set_clockgating_by_smu(pp_handle, msg_id); 6018 } 6019 6020 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6021 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6022 pp_support_state = PP_STATE_SUPPORT_LS; 6023 pp_state = PP_STATE_LS; 6024 } 6025 6026 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6027 pp_support_state |= PP_STATE_SUPPORT_CG; 6028 pp_state |= PP_STATE_CG; 6029 } 6030 6031 if (state == AMD_CG_STATE_UNGATE) 6032 pp_state = 0; 6033 6034 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6035 PP_BLOCK_GFX_MG, 6036 pp_support_state, 6037 pp_state); 6038 amd_set_clockgating_by_smu(pp_handle, msg_id); 6039 } 6040 6041 return 0; 6042 } 6043 6044 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6045 enum amd_clockgating_state state) 6046 { 6047 6048 uint32_t msg_id, pp_state = 0; 6049 uint32_t pp_support_state = 0; 6050 void *pp_handle = adev->powerplay.pp_handle; 6051 6052 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6053 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6054 pp_support_state = PP_STATE_SUPPORT_LS; 6055 pp_state = PP_STATE_LS; 6056 } 6057 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6058 pp_support_state |= PP_STATE_SUPPORT_CG; 6059 pp_state |= PP_STATE_CG; 6060 } 6061 if (state == AMD_CG_STATE_UNGATE) 6062 pp_state = 0; 6063 6064 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6065 PP_BLOCK_GFX_CG, 6066 pp_support_state, 6067 pp_state); 6068 amd_set_clockgating_by_smu(pp_handle, msg_id); 6069 } 6070 6071 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6072 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6073 pp_support_state = PP_STATE_SUPPORT_LS; 6074 pp_state = PP_STATE_LS; 6075 } 6076 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6077 pp_support_state |= PP_STATE_SUPPORT_CG; 6078 pp_state |= PP_STATE_CG; 6079 } 6080 if (state == AMD_CG_STATE_UNGATE) 6081 pp_state = 0; 6082 6083 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6084 PP_BLOCK_GFX_3D, 6085 pp_support_state, 6086 pp_state); 6087 amd_set_clockgating_by_smu(pp_handle, msg_id); 6088 } 6089 6090 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6091 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6092 pp_support_state = PP_STATE_SUPPORT_LS; 6093 pp_state = PP_STATE_LS; 6094 } 6095 6096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6097 pp_support_state |= PP_STATE_SUPPORT_CG; 6098 pp_state |= PP_STATE_CG; 6099 } 6100 6101 if (state == AMD_CG_STATE_UNGATE) 6102 pp_state = 0; 6103 6104 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6105 PP_BLOCK_GFX_MG, 6106 pp_support_state, 6107 pp_state); 6108 amd_set_clockgating_by_smu(pp_handle, msg_id); 6109 } 6110 6111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6112 pp_support_state = PP_STATE_SUPPORT_LS; 6113 6114 if (state == AMD_CG_STATE_UNGATE) 6115 pp_state = 0; 6116 else 6117 pp_state = PP_STATE_LS; 6118 6119 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6120 PP_BLOCK_GFX_RLC, 6121 pp_support_state, 6122 pp_state); 6123 amd_set_clockgating_by_smu(pp_handle, msg_id); 6124 } 6125 6126 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6127 pp_support_state = PP_STATE_SUPPORT_LS; 6128 6129 if (state == AMD_CG_STATE_UNGATE) 6130 pp_state = 0; 6131 else 6132 pp_state = PP_STATE_LS; 6133 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6134 PP_BLOCK_GFX_CP, 6135 pp_support_state, 6136 pp_state); 6137 amd_set_clockgating_by_smu(pp_handle, msg_id); 6138 } 6139 6140 return 0; 6141 } 6142 6143 static int gfx_v8_0_set_clockgating_state(void *handle, 6144 enum amd_clockgating_state state) 6145 { 6146 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6147 6148 if (amdgpu_sriov_vf(adev)) 6149 return 0; 6150 6151 switch (adev->asic_type) { 6152 case CHIP_FIJI: 6153 case CHIP_CARRIZO: 6154 case CHIP_STONEY: 6155 gfx_v8_0_update_gfx_clock_gating(adev, 6156 state == AMD_CG_STATE_GATE); 6157 break; 6158 case CHIP_TONGA: 6159 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6160 break; 6161 case CHIP_POLARIS10: 6162 case CHIP_POLARIS11: 6163 case CHIP_POLARIS12: 6164 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6165 break; 6166 default: 6167 break; 6168 } 6169 return 0; 6170 } 6171 6172 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6173 { 6174 return ring->adev->wb.wb[ring->rptr_offs]; 6175 } 6176 6177 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6178 { 6179 struct amdgpu_device *adev = ring->adev; 6180 6181 if (ring->use_doorbell) 6182 /* XXX check if swapping is necessary on BE */ 6183 return ring->adev->wb.wb[ring->wptr_offs]; 6184 else 6185 return RREG32(mmCP_RB0_WPTR); 6186 } 6187 6188 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6189 { 6190 struct amdgpu_device *adev = ring->adev; 6191 6192 if (ring->use_doorbell) { 6193 /* XXX check if swapping is necessary on BE */ 6194 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6195 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6196 } else { 6197 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6198 (void)RREG32(mmCP_RB0_WPTR); 6199 } 6200 } 6201 6202 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6203 { 6204 u32 ref_and_mask, reg_mem_engine; 6205 6206 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6207 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6208 switch (ring->me) { 6209 case 1: 6210 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6211 break; 6212 case 2: 6213 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6214 break; 6215 default: 6216 return; 6217 } 6218 reg_mem_engine = 0; 6219 } else { 6220 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6221 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6222 } 6223 6224 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6225 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6226 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6227 reg_mem_engine)); 6228 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6229 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6230 amdgpu_ring_write(ring, ref_and_mask); 6231 amdgpu_ring_write(ring, ref_and_mask); 6232 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6233 } 6234 6235 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6236 { 6237 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6238 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6239 EVENT_INDEX(4)); 6240 6241 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6242 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6243 EVENT_INDEX(0)); 6244 } 6245 6246 6247 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6248 { 6249 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6250 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6251 WRITE_DATA_DST_SEL(0) | 6252 WR_CONFIRM)); 6253 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6254 amdgpu_ring_write(ring, 0); 6255 amdgpu_ring_write(ring, 1); 6256 6257 } 6258 6259 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6260 struct amdgpu_ib *ib, 6261 unsigned vm_id, bool ctx_switch) 6262 { 6263 u32 header, control = 0; 6264 6265 if (ib->flags & AMDGPU_IB_FLAG_CE) 6266 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6267 else 6268 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6269 6270 control |= ib->length_dw | (vm_id << 24); 6271 6272 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6273 control |= INDIRECT_BUFFER_PRE_ENB(1); 6274 6275 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6276 gfx_v8_0_ring_emit_de_meta(ring); 6277 } 6278 6279 amdgpu_ring_write(ring, header); 6280 amdgpu_ring_write(ring, 6281 #ifdef __BIG_ENDIAN 6282 (2 << 0) | 6283 #endif 6284 (ib->gpu_addr & 0xFFFFFFFC)); 6285 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6286 amdgpu_ring_write(ring, control); 6287 } 6288 6289 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6290 struct amdgpu_ib *ib, 6291 unsigned vm_id, bool ctx_switch) 6292 { 6293 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6294 6295 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6296 amdgpu_ring_write(ring, 6297 #ifdef __BIG_ENDIAN 6298 (2 << 0) | 6299 #endif 6300 (ib->gpu_addr & 0xFFFFFFFC)); 6301 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6302 amdgpu_ring_write(ring, control); 6303 } 6304 6305 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6306 u64 seq, unsigned flags) 6307 { 6308 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6309 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6310 6311 /* EVENT_WRITE_EOP - flush caches, send int */ 6312 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6313 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6314 EOP_TC_ACTION_EN | 6315 EOP_TC_WB_ACTION_EN | 6316 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6317 EVENT_INDEX(5))); 6318 amdgpu_ring_write(ring, addr & 0xfffffffc); 6319 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6320 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6321 amdgpu_ring_write(ring, lower_32_bits(seq)); 6322 amdgpu_ring_write(ring, upper_32_bits(seq)); 6323 6324 } 6325 6326 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6327 { 6328 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6329 uint32_t seq = ring->fence_drv.sync_seq; 6330 uint64_t addr = ring->fence_drv.gpu_addr; 6331 6332 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6333 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6334 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6335 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6336 amdgpu_ring_write(ring, addr & 0xfffffffc); 6337 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6338 amdgpu_ring_write(ring, seq); 6339 amdgpu_ring_write(ring, 0xffffffff); 6340 amdgpu_ring_write(ring, 4); /* poll interval */ 6341 } 6342 6343 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6344 unsigned vm_id, uint64_t pd_addr) 6345 { 6346 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6347 6348 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6349 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6350 WRITE_DATA_DST_SEL(0)) | 6351 WR_CONFIRM); 6352 if (vm_id < 8) { 6353 amdgpu_ring_write(ring, 6354 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6355 } else { 6356 amdgpu_ring_write(ring, 6357 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6358 } 6359 amdgpu_ring_write(ring, 0); 6360 amdgpu_ring_write(ring, pd_addr >> 12); 6361 6362 /* bits 0-15 are the VM contexts0-15 */ 6363 /* invalidate the cache */ 6364 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6365 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6366 WRITE_DATA_DST_SEL(0))); 6367 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6368 amdgpu_ring_write(ring, 0); 6369 amdgpu_ring_write(ring, 1 << vm_id); 6370 6371 /* wait for the invalidate to complete */ 6372 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6373 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6374 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6375 WAIT_REG_MEM_ENGINE(0))); /* me */ 6376 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6377 amdgpu_ring_write(ring, 0); 6378 amdgpu_ring_write(ring, 0); /* ref */ 6379 amdgpu_ring_write(ring, 0); /* mask */ 6380 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6381 6382 /* compute doesn't have PFP */ 6383 if (usepfp) { 6384 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6385 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6386 amdgpu_ring_write(ring, 0x0); 6387 } 6388 } 6389 6390 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6391 { 6392 return ring->adev->wb.wb[ring->wptr_offs]; 6393 } 6394 6395 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6396 { 6397 struct amdgpu_device *adev = ring->adev; 6398 6399 /* XXX check if swapping is necessary on BE */ 6400 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6401 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6402 } 6403 6404 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6405 u64 addr, u64 seq, 6406 unsigned flags) 6407 { 6408 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6409 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6410 6411 /* RELEASE_MEM - flush caches, send int */ 6412 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6413 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6414 EOP_TC_ACTION_EN | 6415 EOP_TC_WB_ACTION_EN | 6416 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6417 EVENT_INDEX(5))); 6418 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6419 amdgpu_ring_write(ring, addr & 0xfffffffc); 6420 amdgpu_ring_write(ring, upper_32_bits(addr)); 6421 amdgpu_ring_write(ring, lower_32_bits(seq)); 6422 amdgpu_ring_write(ring, upper_32_bits(seq)); 6423 } 6424 6425 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6426 u64 seq, unsigned int flags) 6427 { 6428 /* we only allocate 32bit for each seq wb address */ 6429 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6430 6431 /* write fence seq to the "addr" */ 6432 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6433 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6434 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6435 amdgpu_ring_write(ring, lower_32_bits(addr)); 6436 amdgpu_ring_write(ring, upper_32_bits(addr)); 6437 amdgpu_ring_write(ring, lower_32_bits(seq)); 6438 6439 if (flags & AMDGPU_FENCE_FLAG_INT) { 6440 /* set register to trigger INT */ 6441 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6442 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6443 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6444 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6445 amdgpu_ring_write(ring, 0); 6446 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6447 } 6448 } 6449 6450 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6451 { 6452 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6453 amdgpu_ring_write(ring, 0); 6454 } 6455 6456 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6457 { 6458 uint32_t dw2 = 0; 6459 6460 if (amdgpu_sriov_vf(ring->adev)) 6461 gfx_v8_0_ring_emit_ce_meta(ring); 6462 6463 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6464 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6465 gfx_v8_0_ring_emit_vgt_flush(ring); 6466 /* set load_global_config & load_global_uconfig */ 6467 dw2 |= 0x8001; 6468 /* set load_cs_sh_regs */ 6469 dw2 |= 0x01000000; 6470 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6471 dw2 |= 0x10002; 6472 6473 /* set load_ce_ram if preamble presented */ 6474 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6475 dw2 |= 0x10000000; 6476 } else { 6477 /* still load_ce_ram if this is the first time preamble presented 6478 * although there is no context switch happens. 6479 */ 6480 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6481 dw2 |= 0x10000000; 6482 } 6483 6484 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6485 amdgpu_ring_write(ring, dw2); 6486 amdgpu_ring_write(ring, 0); 6487 } 6488 6489 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6490 { 6491 unsigned ret; 6492 6493 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6494 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6495 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6496 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6497 ret = ring->wptr & ring->buf_mask; 6498 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6499 return ret; 6500 } 6501 6502 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6503 { 6504 unsigned cur; 6505 6506 BUG_ON(offset > ring->buf_mask); 6507 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6508 6509 cur = (ring->wptr & ring->buf_mask) - 1; 6510 if (likely(cur > offset)) 6511 ring->ring[offset] = cur - offset; 6512 else 6513 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6514 } 6515 6516 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6517 { 6518 struct amdgpu_device *adev = ring->adev; 6519 6520 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6521 amdgpu_ring_write(ring, 0 | /* src: register*/ 6522 (5 << 8) | /* dst: memory */ 6523 (1 << 20)); /* write confirm */ 6524 amdgpu_ring_write(ring, reg); 6525 amdgpu_ring_write(ring, 0); 6526 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6527 adev->virt.reg_val_offs * 4)); 6528 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6529 adev->virt.reg_val_offs * 4)); 6530 } 6531 6532 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6533 uint32_t val) 6534 { 6535 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6536 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6537 amdgpu_ring_write(ring, reg); 6538 amdgpu_ring_write(ring, 0); 6539 amdgpu_ring_write(ring, val); 6540 } 6541 6542 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6543 enum amdgpu_interrupt_state state) 6544 { 6545 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6546 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6547 } 6548 6549 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6550 int me, int pipe, 6551 enum amdgpu_interrupt_state state) 6552 { 6553 u32 mec_int_cntl, mec_int_cntl_reg; 6554 6555 /* 6556 * amdgpu controls only the first MEC. That's why this function only 6557 * handles the setting of interrupts for this specific MEC. All other 6558 * pipes' interrupts are set by amdkfd. 6559 */ 6560 6561 if (me == 1) { 6562 switch (pipe) { 6563 case 0: 6564 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6565 break; 6566 case 1: 6567 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6568 break; 6569 case 2: 6570 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6571 break; 6572 case 3: 6573 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6574 break; 6575 default: 6576 DRM_DEBUG("invalid pipe %d\n", pipe); 6577 return; 6578 } 6579 } else { 6580 DRM_DEBUG("invalid me %d\n", me); 6581 return; 6582 } 6583 6584 switch (state) { 6585 case AMDGPU_IRQ_STATE_DISABLE: 6586 mec_int_cntl = RREG32(mec_int_cntl_reg); 6587 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6588 WREG32(mec_int_cntl_reg, mec_int_cntl); 6589 break; 6590 case AMDGPU_IRQ_STATE_ENABLE: 6591 mec_int_cntl = RREG32(mec_int_cntl_reg); 6592 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6593 WREG32(mec_int_cntl_reg, mec_int_cntl); 6594 break; 6595 default: 6596 break; 6597 } 6598 } 6599 6600 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6601 struct amdgpu_irq_src *source, 6602 unsigned type, 6603 enum amdgpu_interrupt_state state) 6604 { 6605 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6606 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6607 6608 return 0; 6609 } 6610 6611 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6612 struct amdgpu_irq_src *source, 6613 unsigned type, 6614 enum amdgpu_interrupt_state state) 6615 { 6616 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6617 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6618 6619 return 0; 6620 } 6621 6622 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6623 struct amdgpu_irq_src *src, 6624 unsigned type, 6625 enum amdgpu_interrupt_state state) 6626 { 6627 switch (type) { 6628 case AMDGPU_CP_IRQ_GFX_EOP: 6629 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6630 break; 6631 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6632 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6633 break; 6634 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6635 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6636 break; 6637 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6638 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6639 break; 6640 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6641 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6642 break; 6643 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6644 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6645 break; 6646 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6647 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6648 break; 6649 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6650 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6651 break; 6652 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6653 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6654 break; 6655 default: 6656 break; 6657 } 6658 return 0; 6659 } 6660 6661 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6662 struct amdgpu_irq_src *source, 6663 struct amdgpu_iv_entry *entry) 6664 { 6665 int i; 6666 u8 me_id, pipe_id, queue_id; 6667 struct amdgpu_ring *ring; 6668 6669 DRM_DEBUG("IH: CP EOP\n"); 6670 me_id = (entry->ring_id & 0x0c) >> 2; 6671 pipe_id = (entry->ring_id & 0x03) >> 0; 6672 queue_id = (entry->ring_id & 0x70) >> 4; 6673 6674 switch (me_id) { 6675 case 0: 6676 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6677 break; 6678 case 1: 6679 case 2: 6680 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6681 ring = &adev->gfx.compute_ring[i]; 6682 /* Per-queue interrupt is supported for MEC starting from VI. 6683 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6684 */ 6685 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6686 amdgpu_fence_process(ring); 6687 } 6688 break; 6689 } 6690 return 0; 6691 } 6692 6693 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6694 struct amdgpu_irq_src *source, 6695 struct amdgpu_iv_entry *entry) 6696 { 6697 DRM_ERROR("Illegal register access in command stream\n"); 6698 schedule_work(&adev->reset_work); 6699 return 0; 6700 } 6701 6702 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6703 struct amdgpu_irq_src *source, 6704 struct amdgpu_iv_entry *entry) 6705 { 6706 DRM_ERROR("Illegal instruction in command stream\n"); 6707 schedule_work(&adev->reset_work); 6708 return 0; 6709 } 6710 6711 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6712 struct amdgpu_irq_src *src, 6713 unsigned int type, 6714 enum amdgpu_interrupt_state state) 6715 { 6716 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6717 6718 switch (type) { 6719 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6720 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6721 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6722 if (ring->me == 1) 6723 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6724 ring->pipe, 6725 GENERIC2_INT_ENABLE, 6726 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6727 else 6728 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6729 ring->pipe, 6730 GENERIC2_INT_ENABLE, 6731 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6732 break; 6733 default: 6734 BUG(); /* kiq only support GENERIC2_INT now */ 6735 break; 6736 } 6737 return 0; 6738 } 6739 6740 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 6741 struct amdgpu_irq_src *source, 6742 struct amdgpu_iv_entry *entry) 6743 { 6744 u8 me_id, pipe_id, queue_id; 6745 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6746 6747 me_id = (entry->ring_id & 0x0c) >> 2; 6748 pipe_id = (entry->ring_id & 0x03) >> 0; 6749 queue_id = (entry->ring_id & 0x70) >> 4; 6750 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 6751 me_id, pipe_id, queue_id); 6752 6753 amdgpu_fence_process(ring); 6754 return 0; 6755 } 6756 6757 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6758 .name = "gfx_v8_0", 6759 .early_init = gfx_v8_0_early_init, 6760 .late_init = gfx_v8_0_late_init, 6761 .sw_init = gfx_v8_0_sw_init, 6762 .sw_fini = gfx_v8_0_sw_fini, 6763 .hw_init = gfx_v8_0_hw_init, 6764 .hw_fini = gfx_v8_0_hw_fini, 6765 .suspend = gfx_v8_0_suspend, 6766 .resume = gfx_v8_0_resume, 6767 .is_idle = gfx_v8_0_is_idle, 6768 .wait_for_idle = gfx_v8_0_wait_for_idle, 6769 .check_soft_reset = gfx_v8_0_check_soft_reset, 6770 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6771 .soft_reset = gfx_v8_0_soft_reset, 6772 .post_soft_reset = gfx_v8_0_post_soft_reset, 6773 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6774 .set_powergating_state = gfx_v8_0_set_powergating_state, 6775 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6776 }; 6777 6778 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6779 .type = AMDGPU_RING_TYPE_GFX, 6780 .align_mask = 0xff, 6781 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6782 .support_64bit_ptrs = false, 6783 .get_rptr = gfx_v8_0_ring_get_rptr, 6784 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6785 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6786 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6787 5 + /* COND_EXEC */ 6788 7 + /* PIPELINE_SYNC */ 6789 19 + /* VM_FLUSH */ 6790 8 + /* FENCE for VM_FLUSH */ 6791 20 + /* GDS switch */ 6792 4 + /* double SWITCH_BUFFER, 6793 the first COND_EXEC jump to the place just 6794 prior to this double SWITCH_BUFFER */ 6795 5 + /* COND_EXEC */ 6796 7 + /* HDP_flush */ 6797 4 + /* VGT_flush */ 6798 14 + /* CE_META */ 6799 31 + /* DE_META */ 6800 3 + /* CNTX_CTRL */ 6801 5 + /* HDP_INVL */ 6802 8 + 8 + /* FENCE x2 */ 6803 2, /* SWITCH_BUFFER */ 6804 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6805 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6806 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6807 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6808 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6809 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6810 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6811 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6812 .test_ring = gfx_v8_0_ring_test_ring, 6813 .test_ib = gfx_v8_0_ring_test_ib, 6814 .insert_nop = amdgpu_ring_insert_nop, 6815 .pad_ib = amdgpu_ring_generic_pad_ib, 6816 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6817 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6818 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6819 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6820 }; 6821 6822 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6823 .type = AMDGPU_RING_TYPE_COMPUTE, 6824 .align_mask = 0xff, 6825 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6826 .support_64bit_ptrs = false, 6827 .get_rptr = gfx_v8_0_ring_get_rptr, 6828 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6829 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6830 .emit_frame_size = 6831 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6832 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6833 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6834 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6835 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6836 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6837 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6838 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6839 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6840 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6841 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6842 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6843 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6844 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6845 .test_ring = gfx_v8_0_ring_test_ring, 6846 .test_ib = gfx_v8_0_ring_test_ib, 6847 .insert_nop = amdgpu_ring_insert_nop, 6848 .pad_ib = amdgpu_ring_generic_pad_ib, 6849 }; 6850 6851 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6852 .type = AMDGPU_RING_TYPE_KIQ, 6853 .align_mask = 0xff, 6854 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6855 .support_64bit_ptrs = false, 6856 .get_rptr = gfx_v8_0_ring_get_rptr, 6857 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6858 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6859 .emit_frame_size = 6860 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6861 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6862 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6863 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6864 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6865 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6866 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6867 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6868 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6869 .test_ring = gfx_v8_0_ring_test_ring, 6870 .test_ib = gfx_v8_0_ring_test_ib, 6871 .insert_nop = amdgpu_ring_insert_nop, 6872 .pad_ib = amdgpu_ring_generic_pad_ib, 6873 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6874 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6875 }; 6876 6877 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6878 { 6879 int i; 6880 6881 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6882 6883 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6884 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6885 6886 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6887 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6888 } 6889 6890 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6891 .set = gfx_v8_0_set_eop_interrupt_state, 6892 .process = gfx_v8_0_eop_irq, 6893 }; 6894 6895 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6896 .set = gfx_v8_0_set_priv_reg_fault_state, 6897 .process = gfx_v8_0_priv_reg_irq, 6898 }; 6899 6900 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6901 .set = gfx_v8_0_set_priv_inst_fault_state, 6902 .process = gfx_v8_0_priv_inst_irq, 6903 }; 6904 6905 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 6906 .set = gfx_v8_0_kiq_set_interrupt_state, 6907 .process = gfx_v8_0_kiq_irq, 6908 }; 6909 6910 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6911 { 6912 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6913 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6914 6915 adev->gfx.priv_reg_irq.num_types = 1; 6916 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6917 6918 adev->gfx.priv_inst_irq.num_types = 1; 6919 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6920 6921 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 6922 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 6923 } 6924 6925 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6926 { 6927 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6928 } 6929 6930 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6931 { 6932 /* init asci gds info */ 6933 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6934 adev->gds.gws.total_size = 64; 6935 adev->gds.oa.total_size = 16; 6936 6937 if (adev->gds.mem.total_size == 64 * 1024) { 6938 adev->gds.mem.gfx_partition_size = 4096; 6939 adev->gds.mem.cs_partition_size = 4096; 6940 6941 adev->gds.gws.gfx_partition_size = 4; 6942 adev->gds.gws.cs_partition_size = 4; 6943 6944 adev->gds.oa.gfx_partition_size = 4; 6945 adev->gds.oa.cs_partition_size = 1; 6946 } else { 6947 adev->gds.mem.gfx_partition_size = 1024; 6948 adev->gds.mem.cs_partition_size = 1024; 6949 6950 adev->gds.gws.gfx_partition_size = 16; 6951 adev->gds.gws.cs_partition_size = 16; 6952 6953 adev->gds.oa.gfx_partition_size = 4; 6954 adev->gds.oa.cs_partition_size = 4; 6955 } 6956 } 6957 6958 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6959 u32 bitmap) 6960 { 6961 u32 data; 6962 6963 if (!bitmap) 6964 return; 6965 6966 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6967 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6968 6969 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 6970 } 6971 6972 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6973 { 6974 u32 data, mask; 6975 6976 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6977 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6978 6979 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6980 6981 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6982 } 6983 6984 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6985 { 6986 int i, j, k, counter, active_cu_number = 0; 6987 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6988 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6989 unsigned disable_masks[4 * 2]; 6990 u32 ao_cu_num; 6991 6992 memset(cu_info, 0, sizeof(*cu_info)); 6993 6994 if (adev->flags & AMD_IS_APU) 6995 ao_cu_num = 2; 6996 else 6997 ao_cu_num = adev->gfx.config.max_cu_per_sh; 6998 6999 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7000 7001 mutex_lock(&adev->grbm_idx_mutex); 7002 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7003 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7004 mask = 1; 7005 ao_bitmap = 0; 7006 counter = 0; 7007 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7008 if (i < 4 && j < 2) 7009 gfx_v8_0_set_user_cu_inactive_bitmap( 7010 adev, disable_masks[i * 2 + j]); 7011 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7012 cu_info->bitmap[i][j] = bitmap; 7013 7014 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7015 if (bitmap & mask) { 7016 if (counter < ao_cu_num) 7017 ao_bitmap |= mask; 7018 counter ++; 7019 } 7020 mask <<= 1; 7021 } 7022 active_cu_number += counter; 7023 if (i < 2 && j < 2) 7024 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7025 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7026 } 7027 } 7028 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7029 mutex_unlock(&adev->grbm_idx_mutex); 7030 7031 cu_info->number = active_cu_number; 7032 cu_info->ao_cu_mask = ao_cu_mask; 7033 } 7034 7035 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7036 { 7037 .type = AMD_IP_BLOCK_TYPE_GFX, 7038 .major = 8, 7039 .minor = 0, 7040 .rev = 0, 7041 .funcs = &gfx_v8_0_ip_funcs, 7042 }; 7043 7044 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7045 { 7046 .type = AMD_IP_BLOCK_TYPE_GFX, 7047 .major = 8, 7048 .minor = 1, 7049 .rev = 0, 7050 .funcs = &gfx_v8_0_ip_funcs, 7051 }; 7052 7053 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7054 { 7055 uint64_t ce_payload_addr; 7056 int cnt_ce; 7057 static union { 7058 struct vi_ce_ib_state regular; 7059 struct vi_ce_ib_state_chained_ib chained; 7060 } ce_payload = {}; 7061 7062 if (ring->adev->virt.chained_ib_support) { 7063 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7064 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7065 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7066 } else { 7067 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7068 offsetof(struct vi_gfx_meta_data, ce_payload); 7069 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7070 } 7071 7072 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7073 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7074 WRITE_DATA_DST_SEL(8) | 7075 WR_CONFIRM) | 7076 WRITE_DATA_CACHE_POLICY(0)); 7077 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7078 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7079 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7080 } 7081 7082 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7083 { 7084 uint64_t de_payload_addr, gds_addr, csa_addr; 7085 int cnt_de; 7086 static union { 7087 struct vi_de_ib_state regular; 7088 struct vi_de_ib_state_chained_ib chained; 7089 } de_payload = {}; 7090 7091 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 7092 gds_addr = csa_addr + 4096; 7093 if (ring->adev->virt.chained_ib_support) { 7094 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7095 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7096 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7097 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7098 } else { 7099 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7100 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7101 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7102 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7103 } 7104 7105 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7106 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7107 WRITE_DATA_DST_SEL(8) | 7108 WR_CONFIRM) | 7109 WRITE_DATA_CACHE_POLICY(0)); 7110 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7111 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7112 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7113 } 7114