1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vi_structs.h" 29 #include "vid.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_atombios.h" 32 #include "atombios_i2c.h" 33 #include "clearstate_vi.h" 34 35 #include "gmc/gmc_8_2_d.h" 36 #include "gmc/gmc_8_2_sh_mask.h" 37 38 #include "oss/oss_3_0_d.h" 39 #include "oss/oss_3_0_sh_mask.h" 40 41 #include "bif/bif_5_0_d.h" 42 #include "bif/bif_5_0_sh_mask.h" 43 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_NUM_COMPUTE_RINGS 8 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 134 135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 148 149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 150 { 151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 167 }; 168 169 static const u32 golden_settings_tonga_a11[] = 170 { 171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 173 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 174 mmGB_GPU_ID, 0x0000000f, 0x00000000, 175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 181 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 187 }; 188 189 static const u32 tonga_golden_common_all[] = 190 { 191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 199 }; 200 201 static const u32 tonga_mgcg_cgcg_init[] = 202 { 203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 278 }; 279 280 static const u32 golden_settings_polaris11_a11[] = 281 { 282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 285 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 292 mmSQ_CONFIG, 0x07f80000, 0x01180000, 293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 294 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 299 }; 300 301 static const u32 polaris11_golden_common_all[] = 302 { 303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 309 }; 310 311 static const u32 golden_settings_polaris10_a11[] = 312 { 313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 317 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 324 mmSQ_CONFIG, 0x07f80000, 0x07180000, 325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 326 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 330 }; 331 332 static const u32 polaris10_golden_common_all[] = 333 { 334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 342 }; 343 344 static const u32 fiji_golden_common_all[] = 345 { 346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 356 }; 357 358 static const u32 golden_settings_fiji_a10[] = 359 { 360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 361 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 367 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 371 }; 372 373 static const u32 fiji_mgcg_cgcg_init[] = 374 { 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 410 }; 411 412 static const u32 golden_settings_iceland_a11[] = 413 { 414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 415 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 416 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 417 mmGB_GPU_ID, 0x0000000f, 0x00000000, 418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 425 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 430 }; 431 432 static const u32 iceland_golden_common_all[] = 433 { 434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 442 }; 443 444 static const u32 iceland_mgcg_cgcg_init[] = 445 { 446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 510 }; 511 512 static const u32 cz_golden_settings_a11[] = 513 { 514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 515 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 516 mmGB_GPU_ID, 0x0000000f, 0x00000000, 517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 526 }; 527 528 static const u32 cz_golden_common_all[] = 529 { 530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 538 }; 539 540 static const u32 cz_mgcg_cgcg_init[] = 541 { 542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 617 }; 618 619 static const u32 stoney_golden_settings_a11[] = 620 { 621 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 622 mmGB_GPU_ID, 0x0000000f, 0x00000000, 623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 631 }; 632 633 static const u32 stoney_golden_common_all[] = 634 { 635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 643 }; 644 645 static const u32 stoney_mgcg_cgcg_init[] = 646 { 647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 652 }; 653 654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); 661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); 662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); 663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); 664 665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 666 { 667 switch (adev->asic_type) { 668 case CHIP_TOPAZ: 669 amdgpu_program_register_sequence(adev, 670 iceland_mgcg_cgcg_init, 671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 672 amdgpu_program_register_sequence(adev, 673 golden_settings_iceland_a11, 674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 675 amdgpu_program_register_sequence(adev, 676 iceland_golden_common_all, 677 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 678 break; 679 case CHIP_FIJI: 680 amdgpu_program_register_sequence(adev, 681 fiji_mgcg_cgcg_init, 682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 683 amdgpu_program_register_sequence(adev, 684 golden_settings_fiji_a10, 685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 686 amdgpu_program_register_sequence(adev, 687 fiji_golden_common_all, 688 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 689 break; 690 691 case CHIP_TONGA: 692 amdgpu_program_register_sequence(adev, 693 tonga_mgcg_cgcg_init, 694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 695 amdgpu_program_register_sequence(adev, 696 golden_settings_tonga_a11, 697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 698 amdgpu_program_register_sequence(adev, 699 tonga_golden_common_all, 700 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 701 break; 702 case CHIP_POLARIS11: 703 case CHIP_POLARIS12: 704 amdgpu_program_register_sequence(adev, 705 golden_settings_polaris11_a11, 706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 707 amdgpu_program_register_sequence(adev, 708 polaris11_golden_common_all, 709 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 710 break; 711 case CHIP_POLARIS10: 712 amdgpu_program_register_sequence(adev, 713 golden_settings_polaris10_a11, 714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 715 amdgpu_program_register_sequence(adev, 716 polaris10_golden_common_all, 717 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 719 if (adev->pdev->revision == 0xc7 && 720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 725 } 726 break; 727 case CHIP_CARRIZO: 728 amdgpu_program_register_sequence(adev, 729 cz_mgcg_cgcg_init, 730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 731 amdgpu_program_register_sequence(adev, 732 cz_golden_settings_a11, 733 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 734 amdgpu_program_register_sequence(adev, 735 cz_golden_common_all, 736 (const u32)ARRAY_SIZE(cz_golden_common_all)); 737 break; 738 case CHIP_STONEY: 739 amdgpu_program_register_sequence(adev, 740 stoney_mgcg_cgcg_init, 741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 742 amdgpu_program_register_sequence(adev, 743 stoney_golden_settings_a11, 744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 745 amdgpu_program_register_sequence(adev, 746 stoney_golden_common_all, 747 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 748 break; 749 default: 750 break; 751 } 752 } 753 754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 755 { 756 adev->gfx.scratch.num_reg = 7; 757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 759 } 760 761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 762 { 763 struct amdgpu_device *adev = ring->adev; 764 uint32_t scratch; 765 uint32_t tmp = 0; 766 unsigned i; 767 int r; 768 769 r = amdgpu_gfx_scratch_get(adev, &scratch); 770 if (r) { 771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 772 return r; 773 } 774 WREG32(scratch, 0xCAFEDEAD); 775 r = amdgpu_ring_alloc(ring, 3); 776 if (r) { 777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 778 ring->idx, r); 779 amdgpu_gfx_scratch_free(adev, scratch); 780 return r; 781 } 782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 784 amdgpu_ring_write(ring, 0xDEADBEEF); 785 amdgpu_ring_commit(ring); 786 787 for (i = 0; i < adev->usec_timeout; i++) { 788 tmp = RREG32(scratch); 789 if (tmp == 0xDEADBEEF) 790 break; 791 DRM_UDELAY(1); 792 } 793 if (i < adev->usec_timeout) { 794 DRM_INFO("ring test on %d succeeded in %d usecs\n", 795 ring->idx, i); 796 } else { 797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 798 ring->idx, scratch, tmp); 799 r = -EINVAL; 800 } 801 amdgpu_gfx_scratch_free(adev, scratch); 802 return r; 803 } 804 805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 806 { 807 struct amdgpu_device *adev = ring->adev; 808 struct amdgpu_ib ib; 809 struct dma_fence *f = NULL; 810 uint32_t scratch; 811 uint32_t tmp = 0; 812 long r; 813 814 r = amdgpu_gfx_scratch_get(adev, &scratch); 815 if (r) { 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 817 return r; 818 } 819 WREG32(scratch, 0xCAFEDEAD); 820 memset(&ib, 0, sizeof(ib)); 821 r = amdgpu_ib_get(adev, NULL, 256, &ib); 822 if (r) { 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 824 goto err1; 825 } 826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 828 ib.ptr[2] = 0xDEADBEEF; 829 ib.length_dw = 3; 830 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 832 if (r) 833 goto err2; 834 835 r = dma_fence_wait_timeout(f, false, timeout); 836 if (r == 0) { 837 DRM_ERROR("amdgpu: IB test timed out.\n"); 838 r = -ETIMEDOUT; 839 goto err2; 840 } else if (r < 0) { 841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 842 goto err2; 843 } 844 tmp = RREG32(scratch); 845 if (tmp == 0xDEADBEEF) { 846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 847 r = 0; 848 } else { 849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 850 scratch, tmp); 851 r = -EINVAL; 852 } 853 err2: 854 amdgpu_ib_free(adev, &ib, NULL); 855 dma_fence_put(f); 856 err1: 857 amdgpu_gfx_scratch_free(adev, scratch); 858 return r; 859 } 860 861 862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 863 release_firmware(adev->gfx.pfp_fw); 864 adev->gfx.pfp_fw = NULL; 865 release_firmware(adev->gfx.me_fw); 866 adev->gfx.me_fw = NULL; 867 release_firmware(adev->gfx.ce_fw); 868 adev->gfx.ce_fw = NULL; 869 release_firmware(adev->gfx.rlc_fw); 870 adev->gfx.rlc_fw = NULL; 871 release_firmware(adev->gfx.mec_fw); 872 adev->gfx.mec_fw = NULL; 873 if ((adev->asic_type != CHIP_STONEY) && 874 (adev->asic_type != CHIP_TOPAZ)) 875 release_firmware(adev->gfx.mec2_fw); 876 adev->gfx.mec2_fw = NULL; 877 878 kfree(adev->gfx.rlc.register_list_format); 879 } 880 881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 882 { 883 const char *chip_name; 884 char fw_name[30]; 885 int err; 886 struct amdgpu_firmware_info *info = NULL; 887 const struct common_firmware_header *header = NULL; 888 const struct gfx_firmware_header_v1_0 *cp_hdr; 889 const struct rlc_firmware_header_v2_0 *rlc_hdr; 890 unsigned int *tmp = NULL, i; 891 892 DRM_DEBUG("\n"); 893 894 switch (adev->asic_type) { 895 case CHIP_TOPAZ: 896 chip_name = "topaz"; 897 break; 898 case CHIP_TONGA: 899 chip_name = "tonga"; 900 break; 901 case CHIP_CARRIZO: 902 chip_name = "carrizo"; 903 break; 904 case CHIP_FIJI: 905 chip_name = "fiji"; 906 break; 907 case CHIP_POLARIS11: 908 chip_name = "polaris11"; 909 break; 910 case CHIP_POLARIS10: 911 chip_name = "polaris10"; 912 break; 913 case CHIP_POLARIS12: 914 chip_name = "polaris12"; 915 break; 916 case CHIP_STONEY: 917 chip_name = "stoney"; 918 break; 919 default: 920 BUG(); 921 } 922 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 925 if (err) 926 goto out; 927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 928 if (err) 929 goto out; 930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 933 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 936 if (err) 937 goto out; 938 err = amdgpu_ucode_validate(adev->gfx.me_fw); 939 if (err) 940 goto out; 941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 943 944 /* chain ib ucode isn't formal released, just disable it by far 945 * TODO: when ucod ready we should use ucode version to judge if 946 * chain-ib support or not. 947 */ 948 adev->virt.chained_ib_support = false; 949 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 951 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 954 if (err) 955 goto out; 956 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 957 if (err) 958 goto out; 959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 962 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 965 if (err) 966 goto out; 967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 971 972 adev->gfx.rlc.save_and_restore_offset = 973 le32_to_cpu(rlc_hdr->save_and_restore_offset); 974 adev->gfx.rlc.clear_state_descriptor_offset = 975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 976 adev->gfx.rlc.avail_scratch_ram_locations = 977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 978 adev->gfx.rlc.reg_restore_list_size = 979 le32_to_cpu(rlc_hdr->reg_restore_list_size); 980 adev->gfx.rlc.reg_list_format_start = 981 le32_to_cpu(rlc_hdr->reg_list_format_start); 982 adev->gfx.rlc.reg_list_format_separate_start = 983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 984 adev->gfx.rlc.starting_offsets_start = 985 le32_to_cpu(rlc_hdr->starting_offsets_start); 986 adev->gfx.rlc.reg_list_format_size_bytes = 987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 988 adev->gfx.rlc.reg_list_size_bytes = 989 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 990 991 adev->gfx.rlc.register_list_format = 992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 994 995 if (!adev->gfx.rlc.register_list_format) { 996 err = -ENOMEM; 997 goto out; 998 } 999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1004 1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1006 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1011 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1014 if (err) 1015 goto out; 1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1017 if (err) 1018 goto out; 1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1022 1023 if ((adev->asic_type != CHIP_STONEY) && 1024 (adev->asic_type != CHIP_TOPAZ)) { 1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1027 if (!err) { 1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1029 if (err) 1030 goto out; 1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1032 adev->gfx.mec2_fw->data; 1033 adev->gfx.mec2_fw_version = 1034 le32_to_cpu(cp_hdr->header.ucode_version); 1035 adev->gfx.mec2_feature_version = 1036 le32_to_cpu(cp_hdr->ucode_feature_version); 1037 } else { 1038 err = 0; 1039 adev->gfx.mec2_fw = NULL; 1040 } 1041 } 1042 1043 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1046 info->fw = adev->gfx.pfp_fw; 1047 header = (const struct common_firmware_header *)info->fw->data; 1048 adev->firmware.fw_size += 1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1050 1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1053 info->fw = adev->gfx.me_fw; 1054 header = (const struct common_firmware_header *)info->fw->data; 1055 adev->firmware.fw_size += 1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1057 1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1060 info->fw = adev->gfx.ce_fw; 1061 header = (const struct common_firmware_header *)info->fw->data; 1062 adev->firmware.fw_size += 1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1064 1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1067 info->fw = adev->gfx.rlc_fw; 1068 header = (const struct common_firmware_header *)info->fw->data; 1069 adev->firmware.fw_size += 1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1071 1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1074 info->fw = adev->gfx.mec_fw; 1075 header = (const struct common_firmware_header *)info->fw->data; 1076 adev->firmware.fw_size += 1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1078 1079 /* we need account JT in */ 1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1081 adev->firmware.fw_size += 1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1083 1084 if (amdgpu_sriov_vf(adev)) { 1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1087 info->fw = adev->gfx.mec_fw; 1088 adev->firmware.fw_size += 1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1090 } 1091 1092 if (adev->gfx.mec2_fw) { 1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1095 info->fw = adev->gfx.mec2_fw; 1096 header = (const struct common_firmware_header *)info->fw->data; 1097 adev->firmware.fw_size += 1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1099 } 1100 1101 } 1102 1103 out: 1104 if (err) { 1105 dev_err(adev->dev, 1106 "gfx8: Failed to load firmware \"%s\"\n", 1107 fw_name); 1108 release_firmware(adev->gfx.pfp_fw); 1109 adev->gfx.pfp_fw = NULL; 1110 release_firmware(adev->gfx.me_fw); 1111 adev->gfx.me_fw = NULL; 1112 release_firmware(adev->gfx.ce_fw); 1113 adev->gfx.ce_fw = NULL; 1114 release_firmware(adev->gfx.rlc_fw); 1115 adev->gfx.rlc_fw = NULL; 1116 release_firmware(adev->gfx.mec_fw); 1117 adev->gfx.mec_fw = NULL; 1118 release_firmware(adev->gfx.mec2_fw); 1119 adev->gfx.mec2_fw = NULL; 1120 } 1121 return err; 1122 } 1123 1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1125 volatile u32 *buffer) 1126 { 1127 u32 count = 0, i; 1128 const struct cs_section_def *sect = NULL; 1129 const struct cs_extent_def *ext = NULL; 1130 1131 if (adev->gfx.rlc.cs_data == NULL) 1132 return; 1133 if (buffer == NULL) 1134 return; 1135 1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1138 1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1140 buffer[count++] = cpu_to_le32(0x80000000); 1141 buffer[count++] = cpu_to_le32(0x80000000); 1142 1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1144 for (ext = sect->section; ext->extent != NULL; ++ext) { 1145 if (sect->id == SECT_CONTEXT) { 1146 buffer[count++] = 1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1148 buffer[count++] = cpu_to_le32(ext->reg_index - 1149 PACKET3_SET_CONTEXT_REG_START); 1150 for (i = 0; i < ext->reg_count; i++) 1151 buffer[count++] = cpu_to_le32(ext->extent[i]); 1152 } else { 1153 return; 1154 } 1155 } 1156 } 1157 1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1160 PACKET3_SET_CONTEXT_REG_START); 1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1163 1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1166 1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1168 buffer[count++] = cpu_to_le32(0); 1169 } 1170 1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1172 { 1173 const __le32 *fw_data; 1174 volatile u32 *dst_ptr; 1175 int me, i, max_me = 4; 1176 u32 bo_offset = 0; 1177 u32 table_offset, table_size; 1178 1179 if (adev->asic_type == CHIP_CARRIZO) 1180 max_me = 5; 1181 1182 /* write the cp table buffer */ 1183 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1184 for (me = 0; me < max_me; me++) { 1185 if (me == 0) { 1186 const struct gfx_firmware_header_v1_0 *hdr = 1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1188 fw_data = (const __le32 *) 1189 (adev->gfx.ce_fw->data + 1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1191 table_offset = le32_to_cpu(hdr->jt_offset); 1192 table_size = le32_to_cpu(hdr->jt_size); 1193 } else if (me == 1) { 1194 const struct gfx_firmware_header_v1_0 *hdr = 1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1196 fw_data = (const __le32 *) 1197 (adev->gfx.pfp_fw->data + 1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1199 table_offset = le32_to_cpu(hdr->jt_offset); 1200 table_size = le32_to_cpu(hdr->jt_size); 1201 } else if (me == 2) { 1202 const struct gfx_firmware_header_v1_0 *hdr = 1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1204 fw_data = (const __le32 *) 1205 (adev->gfx.me_fw->data + 1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1207 table_offset = le32_to_cpu(hdr->jt_offset); 1208 table_size = le32_to_cpu(hdr->jt_size); 1209 } else if (me == 3) { 1210 const struct gfx_firmware_header_v1_0 *hdr = 1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1212 fw_data = (const __le32 *) 1213 (adev->gfx.mec_fw->data + 1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1215 table_offset = le32_to_cpu(hdr->jt_offset); 1216 table_size = le32_to_cpu(hdr->jt_size); 1217 } else if (me == 4) { 1218 const struct gfx_firmware_header_v1_0 *hdr = 1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1220 fw_data = (const __le32 *) 1221 (adev->gfx.mec2_fw->data + 1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1223 table_offset = le32_to_cpu(hdr->jt_offset); 1224 table_size = le32_to_cpu(hdr->jt_size); 1225 } 1226 1227 for (i = 0; i < table_size; i ++) { 1228 dst_ptr[bo_offset + i] = 1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1230 } 1231 1232 bo_offset += table_size; 1233 } 1234 } 1235 1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1237 { 1238 int r; 1239 1240 /* clear state block */ 1241 if (adev->gfx.rlc.clear_state_obj) { 1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1243 if (unlikely(r != 0)) 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1248 adev->gfx.rlc.clear_state_obj = NULL; 1249 } 1250 1251 /* jump table block */ 1252 if (adev->gfx.rlc.cp_table_obj) { 1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1254 if (unlikely(r != 0)) 1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1259 adev->gfx.rlc.cp_table_obj = NULL; 1260 } 1261 } 1262 1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1264 { 1265 volatile u32 *dst_ptr; 1266 u32 dws; 1267 const struct cs_section_def *cs_data; 1268 int r; 1269 1270 adev->gfx.rlc.cs_data = vi_cs_data; 1271 1272 cs_data = adev->gfx.rlc.cs_data; 1273 1274 if (cs_data) { 1275 /* clear state block */ 1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1277 1278 if (adev->gfx.rlc.clear_state_obj == NULL) { 1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1280 AMDGPU_GEM_DOMAIN_VRAM, 1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1283 NULL, NULL, 1284 &adev->gfx.rlc.clear_state_obj); 1285 if (r) { 1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1287 gfx_v8_0_rlc_fini(adev); 1288 return r; 1289 } 1290 } 1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1292 if (unlikely(r != 0)) { 1293 gfx_v8_0_rlc_fini(adev); 1294 return r; 1295 } 1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1297 &adev->gfx.rlc.clear_state_gpu_addr); 1298 if (r) { 1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1301 gfx_v8_0_rlc_fini(adev); 1302 return r; 1303 } 1304 1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1306 if (r) { 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1308 gfx_v8_0_rlc_fini(adev); 1309 return r; 1310 } 1311 /* set up the cs buffer */ 1312 dst_ptr = adev->gfx.rlc.cs_ptr; 1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1316 } 1317 1318 if ((adev->asic_type == CHIP_CARRIZO) || 1319 (adev->asic_type == CHIP_STONEY)) { 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1321 if (adev->gfx.rlc.cp_table_obj == NULL) { 1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1323 AMDGPU_GEM_DOMAIN_VRAM, 1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1326 NULL, NULL, 1327 &adev->gfx.rlc.cp_table_obj); 1328 if (r) { 1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1330 return r; 1331 } 1332 } 1333 1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1335 if (unlikely(r != 0)) { 1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1337 return r; 1338 } 1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1340 &adev->gfx.rlc.cp_table_gpu_addr); 1341 if (r) { 1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1344 return r; 1345 } 1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1347 if (r) { 1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1349 return r; 1350 } 1351 1352 cz_init_cp_jump_table(adev); 1353 1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1362 { 1363 int r; 1364 1365 if (adev->gfx.mec.hpd_eop_obj) { 1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1367 if (unlikely(r != 0)) 1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1372 adev->gfx.mec.hpd_eop_obj = NULL; 1373 } 1374 } 1375 1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, 1377 struct amdgpu_ring *ring, 1378 struct amdgpu_irq_src *irq) 1379 { 1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1381 int r = 0; 1382 1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); 1384 if (r) 1385 return r; 1386 1387 ring->adev = NULL; 1388 ring->ring_obj = NULL; 1389 ring->use_doorbell = true; 1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ; 1391 if (adev->gfx.mec2_fw) { 1392 ring->me = 2; 1393 ring->pipe = 0; 1394 } else { 1395 ring->me = 1; 1396 ring->pipe = 1; 1397 } 1398 1399 ring->queue = 0; 1400 ring->eop_gpu_addr = kiq->eop_gpu_addr; 1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); 1402 r = amdgpu_ring_init(adev, ring, 1024, 1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); 1404 if (r) 1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 1406 1407 return r; 1408 } 1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, 1410 struct amdgpu_irq_src *irq) 1411 { 1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); 1413 amdgpu_ring_fini(ring); 1414 } 1415 1416 #define MEC_HPD_SIZE 2048 1417 1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1419 { 1420 int r; 1421 u32 *hpd; 1422 1423 /* 1424 * we assign only 1 pipe because all other pipes will 1425 * be handled by KFD 1426 */ 1427 adev->gfx.mec.num_mec = 1; 1428 adev->gfx.mec.num_pipe = 1; 1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1430 1431 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1432 r = amdgpu_bo_create(adev, 1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 1434 PAGE_SIZE, true, 1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1436 &adev->gfx.mec.hpd_eop_obj); 1437 if (r) { 1438 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1439 return r; 1440 } 1441 } 1442 1443 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1444 if (unlikely(r != 0)) { 1445 gfx_v8_0_mec_fini(adev); 1446 return r; 1447 } 1448 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1449 &adev->gfx.mec.hpd_eop_gpu_addr); 1450 if (r) { 1451 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1452 gfx_v8_0_mec_fini(adev); 1453 return r; 1454 } 1455 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1456 if (r) { 1457 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1458 gfx_v8_0_mec_fini(adev); 1459 return r; 1460 } 1461 1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); 1463 1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1466 1467 return 0; 1468 } 1469 1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) 1471 { 1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1473 1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 1475 } 1476 1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) 1478 { 1479 int r; 1480 u32 *hpd; 1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1482 1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, 1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 1485 &kiq->eop_gpu_addr, (void **)&hpd); 1486 if (r) { 1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 1488 return r; 1489 } 1490 1491 memset(hpd, 0, MEC_HPD_SIZE); 1492 1493 r = amdgpu_bo_reserve(kiq->eop_obj, false); 1494 if (unlikely(r != 0)) 1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 1496 amdgpu_bo_kunmap(kiq->eop_obj); 1497 amdgpu_bo_unreserve(kiq->eop_obj); 1498 1499 return 0; 1500 } 1501 1502 static const u32 vgpr_init_compute_shader[] = 1503 { 1504 0x7e000209, 0x7e020208, 1505 0x7e040207, 0x7e060206, 1506 0x7e080205, 0x7e0a0204, 1507 0x7e0c0203, 0x7e0e0202, 1508 0x7e100201, 0x7e120200, 1509 0x7e140209, 0x7e160208, 1510 0x7e180207, 0x7e1a0206, 1511 0x7e1c0205, 0x7e1e0204, 1512 0x7e200203, 0x7e220202, 1513 0x7e240201, 0x7e260200, 1514 0x7e280209, 0x7e2a0208, 1515 0x7e2c0207, 0x7e2e0206, 1516 0x7e300205, 0x7e320204, 1517 0x7e340203, 0x7e360202, 1518 0x7e380201, 0x7e3a0200, 1519 0x7e3c0209, 0x7e3e0208, 1520 0x7e400207, 0x7e420206, 1521 0x7e440205, 0x7e460204, 1522 0x7e480203, 0x7e4a0202, 1523 0x7e4c0201, 0x7e4e0200, 1524 0x7e500209, 0x7e520208, 1525 0x7e540207, 0x7e560206, 1526 0x7e580205, 0x7e5a0204, 1527 0x7e5c0203, 0x7e5e0202, 1528 0x7e600201, 0x7e620200, 1529 0x7e640209, 0x7e660208, 1530 0x7e680207, 0x7e6a0206, 1531 0x7e6c0205, 0x7e6e0204, 1532 0x7e700203, 0x7e720202, 1533 0x7e740201, 0x7e760200, 1534 0x7e780209, 0x7e7a0208, 1535 0x7e7c0207, 0x7e7e0206, 1536 0xbf8a0000, 0xbf810000, 1537 }; 1538 1539 static const u32 sgpr_init_compute_shader[] = 1540 { 1541 0xbe8a0100, 0xbe8c0102, 1542 0xbe8e0104, 0xbe900106, 1543 0xbe920108, 0xbe940100, 1544 0xbe960102, 0xbe980104, 1545 0xbe9a0106, 0xbe9c0108, 1546 0xbe9e0100, 0xbea00102, 1547 0xbea20104, 0xbea40106, 1548 0xbea60108, 0xbea80100, 1549 0xbeaa0102, 0xbeac0104, 1550 0xbeae0106, 0xbeb00108, 1551 0xbeb20100, 0xbeb40102, 1552 0xbeb60104, 0xbeb80106, 1553 0xbeba0108, 0xbebc0100, 1554 0xbebe0102, 0xbec00104, 1555 0xbec20106, 0xbec40108, 1556 0xbec60100, 0xbec80102, 1557 0xbee60004, 0xbee70005, 1558 0xbeea0006, 0xbeeb0007, 1559 0xbee80008, 0xbee90009, 1560 0xbefc0000, 0xbf8a0000, 1561 0xbf810000, 0x00000000, 1562 }; 1563 1564 static const u32 vgpr_init_regs[] = 1565 { 1566 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1567 mmCOMPUTE_RESOURCE_LIMITS, 0, 1568 mmCOMPUTE_NUM_THREAD_X, 256*4, 1569 mmCOMPUTE_NUM_THREAD_Y, 1, 1570 mmCOMPUTE_NUM_THREAD_Z, 1, 1571 mmCOMPUTE_PGM_RSRC2, 20, 1572 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1573 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1574 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1575 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1576 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1577 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1578 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1579 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1580 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1581 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1582 }; 1583 1584 static const u32 sgpr1_init_regs[] = 1585 { 1586 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1587 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1588 mmCOMPUTE_NUM_THREAD_X, 256*5, 1589 mmCOMPUTE_NUM_THREAD_Y, 1, 1590 mmCOMPUTE_NUM_THREAD_Z, 1, 1591 mmCOMPUTE_PGM_RSRC2, 20, 1592 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1593 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1594 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1595 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1596 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1597 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1598 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1599 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1600 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1601 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1602 }; 1603 1604 static const u32 sgpr2_init_regs[] = 1605 { 1606 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1607 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1608 mmCOMPUTE_NUM_THREAD_X, 256*5, 1609 mmCOMPUTE_NUM_THREAD_Y, 1, 1610 mmCOMPUTE_NUM_THREAD_Z, 1, 1611 mmCOMPUTE_PGM_RSRC2, 20, 1612 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1613 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1614 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1615 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1616 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1617 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1618 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1619 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1620 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1621 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1622 }; 1623 1624 static const u32 sec_ded_counter_registers[] = 1625 { 1626 mmCPC_EDC_ATC_CNT, 1627 mmCPC_EDC_SCRATCH_CNT, 1628 mmCPC_EDC_UCODE_CNT, 1629 mmCPF_EDC_ATC_CNT, 1630 mmCPF_EDC_ROQ_CNT, 1631 mmCPF_EDC_TAG_CNT, 1632 mmCPG_EDC_ATC_CNT, 1633 mmCPG_EDC_DMA_CNT, 1634 mmCPG_EDC_TAG_CNT, 1635 mmDC_EDC_CSINVOC_CNT, 1636 mmDC_EDC_RESTORE_CNT, 1637 mmDC_EDC_STATE_CNT, 1638 mmGDS_EDC_CNT, 1639 mmGDS_EDC_GRBM_CNT, 1640 mmGDS_EDC_OA_DED, 1641 mmSPI_EDC_CNT, 1642 mmSQC_ATC_EDC_GATCL1_CNT, 1643 mmSQC_EDC_CNT, 1644 mmSQ_EDC_DED_CNT, 1645 mmSQ_EDC_INFO, 1646 mmSQ_EDC_SEC_CNT, 1647 mmTCC_EDC_CNT, 1648 mmTCP_ATC_EDC_GATCL1_CNT, 1649 mmTCP_EDC_CNT, 1650 mmTD_EDC_CNT 1651 }; 1652 1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1654 { 1655 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1656 struct amdgpu_ib ib; 1657 struct dma_fence *f = NULL; 1658 int r, i; 1659 u32 tmp; 1660 unsigned total_size, vgpr_offset, sgpr_offset; 1661 u64 gpu_addr; 1662 1663 /* only supported on CZ */ 1664 if (adev->asic_type != CHIP_CARRIZO) 1665 return 0; 1666 1667 /* bail if the compute ring is not ready */ 1668 if (!ring->ready) 1669 return 0; 1670 1671 tmp = RREG32(mmGB_EDC_MODE); 1672 WREG32(mmGB_EDC_MODE, 0); 1673 1674 total_size = 1675 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1676 total_size += 1677 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1678 total_size += 1679 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1680 total_size = ALIGN(total_size, 256); 1681 vgpr_offset = total_size; 1682 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1683 sgpr_offset = total_size; 1684 total_size += sizeof(sgpr_init_compute_shader); 1685 1686 /* allocate an indirect buffer to put the commands in */ 1687 memset(&ib, 0, sizeof(ib)); 1688 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1689 if (r) { 1690 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1691 return r; 1692 } 1693 1694 /* load the compute shaders */ 1695 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1696 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1697 1698 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1699 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1700 1701 /* init the ib length to 0 */ 1702 ib.length_dw = 0; 1703 1704 /* VGPR */ 1705 /* write the register state for the compute dispatch */ 1706 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1708 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1709 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1710 } 1711 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1712 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1714 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1715 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1716 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1717 1718 /* write dispatch packet */ 1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1720 ib.ptr[ib.length_dw++] = 8; /* x */ 1721 ib.ptr[ib.length_dw++] = 1; /* y */ 1722 ib.ptr[ib.length_dw++] = 1; /* z */ 1723 ib.ptr[ib.length_dw++] = 1724 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1725 1726 /* write CS partial flush packet */ 1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1728 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1729 1730 /* SGPR1 */ 1731 /* write the register state for the compute dispatch */ 1732 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1734 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1735 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1736 } 1737 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1738 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1740 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1741 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1742 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1743 1744 /* write dispatch packet */ 1745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1746 ib.ptr[ib.length_dw++] = 8; /* x */ 1747 ib.ptr[ib.length_dw++] = 1; /* y */ 1748 ib.ptr[ib.length_dw++] = 1; /* z */ 1749 ib.ptr[ib.length_dw++] = 1750 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1751 1752 /* write CS partial flush packet */ 1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1754 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1755 1756 /* SGPR2 */ 1757 /* write the register state for the compute dispatch */ 1758 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1760 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1761 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1762 } 1763 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1764 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1766 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1767 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1768 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1769 1770 /* write dispatch packet */ 1771 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1772 ib.ptr[ib.length_dw++] = 8; /* x */ 1773 ib.ptr[ib.length_dw++] = 1; /* y */ 1774 ib.ptr[ib.length_dw++] = 1; /* z */ 1775 ib.ptr[ib.length_dw++] = 1776 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1777 1778 /* write CS partial flush packet */ 1779 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1780 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1781 1782 /* shedule the ib on the ring */ 1783 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1784 if (r) { 1785 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1786 goto fail; 1787 } 1788 1789 /* wait for the GPU to finish processing the IB */ 1790 r = dma_fence_wait(f, false); 1791 if (r) { 1792 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1793 goto fail; 1794 } 1795 1796 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1797 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1798 WREG32(mmGB_EDC_MODE, tmp); 1799 1800 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1801 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1802 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1803 1804 1805 /* read back registers to clear the counters */ 1806 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1807 RREG32(sec_ded_counter_registers[i]); 1808 1809 fail: 1810 amdgpu_ib_free(adev, &ib, NULL); 1811 dma_fence_put(f); 1812 1813 return r; 1814 } 1815 1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1817 { 1818 u32 gb_addr_config; 1819 u32 mc_shared_chmap, mc_arb_ramcfg; 1820 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1821 u32 tmp; 1822 int ret; 1823 1824 switch (adev->asic_type) { 1825 case CHIP_TOPAZ: 1826 adev->gfx.config.max_shader_engines = 1; 1827 adev->gfx.config.max_tile_pipes = 2; 1828 adev->gfx.config.max_cu_per_sh = 6; 1829 adev->gfx.config.max_sh_per_se = 1; 1830 adev->gfx.config.max_backends_per_se = 2; 1831 adev->gfx.config.max_texture_channel_caches = 2; 1832 adev->gfx.config.max_gprs = 256; 1833 adev->gfx.config.max_gs_threads = 32; 1834 adev->gfx.config.max_hw_contexts = 8; 1835 1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1840 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1841 break; 1842 case CHIP_FIJI: 1843 adev->gfx.config.max_shader_engines = 4; 1844 adev->gfx.config.max_tile_pipes = 16; 1845 adev->gfx.config.max_cu_per_sh = 16; 1846 adev->gfx.config.max_sh_per_se = 1; 1847 adev->gfx.config.max_backends_per_se = 4; 1848 adev->gfx.config.max_texture_channel_caches = 16; 1849 adev->gfx.config.max_gprs = 256; 1850 adev->gfx.config.max_gs_threads = 32; 1851 adev->gfx.config.max_hw_contexts = 8; 1852 1853 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1854 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1855 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1856 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1857 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1858 break; 1859 case CHIP_POLARIS11: 1860 case CHIP_POLARIS12: 1861 ret = amdgpu_atombios_get_gfx_info(adev); 1862 if (ret) 1863 return ret; 1864 adev->gfx.config.max_gprs = 256; 1865 adev->gfx.config.max_gs_threads = 32; 1866 adev->gfx.config.max_hw_contexts = 8; 1867 1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1872 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1873 break; 1874 case CHIP_POLARIS10: 1875 ret = amdgpu_atombios_get_gfx_info(adev); 1876 if (ret) 1877 return ret; 1878 adev->gfx.config.max_gprs = 256; 1879 adev->gfx.config.max_gs_threads = 32; 1880 adev->gfx.config.max_hw_contexts = 8; 1881 1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1887 break; 1888 case CHIP_TONGA: 1889 adev->gfx.config.max_shader_engines = 4; 1890 adev->gfx.config.max_tile_pipes = 8; 1891 adev->gfx.config.max_cu_per_sh = 8; 1892 adev->gfx.config.max_sh_per_se = 1; 1893 adev->gfx.config.max_backends_per_se = 2; 1894 adev->gfx.config.max_texture_channel_caches = 8; 1895 adev->gfx.config.max_gprs = 256; 1896 adev->gfx.config.max_gs_threads = 32; 1897 adev->gfx.config.max_hw_contexts = 8; 1898 1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1903 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1904 break; 1905 case CHIP_CARRIZO: 1906 adev->gfx.config.max_shader_engines = 1; 1907 adev->gfx.config.max_tile_pipes = 2; 1908 adev->gfx.config.max_sh_per_se = 1; 1909 adev->gfx.config.max_backends_per_se = 2; 1910 1911 switch (adev->pdev->revision) { 1912 case 0xc4: 1913 case 0x84: 1914 case 0xc8: 1915 case 0xcc: 1916 case 0xe1: 1917 case 0xe3: 1918 /* B10 */ 1919 adev->gfx.config.max_cu_per_sh = 8; 1920 break; 1921 case 0xc5: 1922 case 0x81: 1923 case 0x85: 1924 case 0xc9: 1925 case 0xcd: 1926 case 0xe2: 1927 case 0xe4: 1928 /* B8 */ 1929 adev->gfx.config.max_cu_per_sh = 6; 1930 break; 1931 case 0xc6: 1932 case 0xca: 1933 case 0xce: 1934 case 0x88: 1935 /* B6 */ 1936 adev->gfx.config.max_cu_per_sh = 6; 1937 break; 1938 case 0xc7: 1939 case 0x87: 1940 case 0xcb: 1941 case 0xe5: 1942 case 0x89: 1943 default: 1944 /* B4 */ 1945 adev->gfx.config.max_cu_per_sh = 4; 1946 break; 1947 } 1948 1949 adev->gfx.config.max_texture_channel_caches = 2; 1950 adev->gfx.config.max_gprs = 256; 1951 adev->gfx.config.max_gs_threads = 32; 1952 adev->gfx.config.max_hw_contexts = 8; 1953 1954 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1955 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1956 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1957 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1958 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1959 break; 1960 case CHIP_STONEY: 1961 adev->gfx.config.max_shader_engines = 1; 1962 adev->gfx.config.max_tile_pipes = 2; 1963 adev->gfx.config.max_sh_per_se = 1; 1964 adev->gfx.config.max_backends_per_se = 1; 1965 1966 switch (adev->pdev->revision) { 1967 case 0xc0: 1968 case 0xc1: 1969 case 0xc2: 1970 case 0xc4: 1971 case 0xc8: 1972 case 0xc9: 1973 adev->gfx.config.max_cu_per_sh = 3; 1974 break; 1975 case 0xd0: 1976 case 0xd1: 1977 case 0xd2: 1978 default: 1979 adev->gfx.config.max_cu_per_sh = 2; 1980 break; 1981 } 1982 1983 adev->gfx.config.max_texture_channel_caches = 2; 1984 adev->gfx.config.max_gprs = 256; 1985 adev->gfx.config.max_gs_threads = 16; 1986 adev->gfx.config.max_hw_contexts = 8; 1987 1988 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1989 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1990 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1991 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1992 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1993 break; 1994 default: 1995 adev->gfx.config.max_shader_engines = 2; 1996 adev->gfx.config.max_tile_pipes = 4; 1997 adev->gfx.config.max_cu_per_sh = 2; 1998 adev->gfx.config.max_sh_per_se = 1; 1999 adev->gfx.config.max_backends_per_se = 2; 2000 adev->gfx.config.max_texture_channel_caches = 4; 2001 adev->gfx.config.max_gprs = 256; 2002 adev->gfx.config.max_gs_threads = 32; 2003 adev->gfx.config.max_hw_contexts = 8; 2004 2005 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2006 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2007 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2008 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 2009 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 2010 break; 2011 } 2012 2013 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 2014 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 2015 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 2016 2017 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 2018 adev->gfx.config.mem_max_burst_length_bytes = 256; 2019 if (adev->flags & AMD_IS_APU) { 2020 /* Get memory bank mapping mode. */ 2021 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 2022 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 2023 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 2024 2025 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 2026 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 2027 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 2028 2029 /* Validate settings in case only one DIMM installed. */ 2030 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 2031 dimm00_addr_map = 0; 2032 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 2033 dimm01_addr_map = 0; 2034 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 2035 dimm10_addr_map = 0; 2036 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 2037 dimm11_addr_map = 0; 2038 2039 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 2040 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 2041 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 2042 adev->gfx.config.mem_row_size_in_kb = 2; 2043 else 2044 adev->gfx.config.mem_row_size_in_kb = 1; 2045 } else { 2046 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 2047 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2048 if (adev->gfx.config.mem_row_size_in_kb > 4) 2049 adev->gfx.config.mem_row_size_in_kb = 4; 2050 } 2051 2052 adev->gfx.config.shader_engine_tile_size = 32; 2053 adev->gfx.config.num_gpus = 1; 2054 adev->gfx.config.multi_gpu_tile_size = 64; 2055 2056 /* fix up row size */ 2057 switch (adev->gfx.config.mem_row_size_in_kb) { 2058 case 1: 2059 default: 2060 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 2061 break; 2062 case 2: 2063 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 2064 break; 2065 case 4: 2066 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 2067 break; 2068 } 2069 adev->gfx.config.gb_addr_config = gb_addr_config; 2070 2071 return 0; 2072 } 2073 2074 static int gfx_v8_0_sw_init(void *handle) 2075 { 2076 int i, r; 2077 struct amdgpu_ring *ring; 2078 struct amdgpu_kiq *kiq; 2079 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2080 2081 /* KIQ event */ 2082 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2083 if (r) 2084 return r; 2085 2086 /* EOP Event */ 2087 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 2088 if (r) 2089 return r; 2090 2091 /* Privileged reg */ 2092 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 2093 &adev->gfx.priv_reg_irq); 2094 if (r) 2095 return r; 2096 2097 /* Privileged inst */ 2098 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 2099 &adev->gfx.priv_inst_irq); 2100 if (r) 2101 return r; 2102 2103 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2104 2105 gfx_v8_0_scratch_init(adev); 2106 2107 r = gfx_v8_0_init_microcode(adev); 2108 if (r) { 2109 DRM_ERROR("Failed to load gfx firmware!\n"); 2110 return r; 2111 } 2112 2113 r = gfx_v8_0_rlc_init(adev); 2114 if (r) { 2115 DRM_ERROR("Failed to init rlc BOs!\n"); 2116 return r; 2117 } 2118 2119 r = gfx_v8_0_mec_init(adev); 2120 if (r) { 2121 DRM_ERROR("Failed to init MEC BOs!\n"); 2122 return r; 2123 } 2124 2125 /* set up the gfx ring */ 2126 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2127 ring = &adev->gfx.gfx_ring[i]; 2128 ring->ring_obj = NULL; 2129 sprintf(ring->name, "gfx"); 2130 /* no gfx doorbells on iceland */ 2131 if (adev->asic_type != CHIP_TOPAZ) { 2132 ring->use_doorbell = true; 2133 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2134 } 2135 2136 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2137 AMDGPU_CP_IRQ_GFX_EOP); 2138 if (r) 2139 return r; 2140 } 2141 2142 /* set up the compute queues */ 2143 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2144 unsigned irq_type; 2145 2146 /* max 32 queues per MEC */ 2147 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2148 DRM_ERROR("Too many (%d) compute rings!\n", i); 2149 break; 2150 } 2151 ring = &adev->gfx.compute_ring[i]; 2152 ring->ring_obj = NULL; 2153 ring->use_doorbell = true; 2154 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2155 ring->me = 1; /* first MEC */ 2156 ring->pipe = i / 8; 2157 ring->queue = i % 8; 2158 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 2159 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2160 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2161 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2162 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2163 irq_type); 2164 if (r) 2165 return r; 2166 } 2167 2168 if (amdgpu_sriov_vf(adev)) { 2169 r = gfx_v8_0_kiq_init(adev); 2170 if (r) { 2171 DRM_ERROR("Failed to init KIQ BOs!\n"); 2172 return r; 2173 } 2174 2175 kiq = &adev->gfx.kiq; 2176 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2177 if (r) 2178 return r; 2179 2180 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2181 r = gfx_v8_0_compute_mqd_sw_init(adev); 2182 if (r) 2183 return r; 2184 } 2185 2186 /* reserve GDS, GWS and OA resource for gfx */ 2187 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2188 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2189 &adev->gds.gds_gfx_bo, NULL, NULL); 2190 if (r) 2191 return r; 2192 2193 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2194 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2195 &adev->gds.gws_gfx_bo, NULL, NULL); 2196 if (r) 2197 return r; 2198 2199 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2200 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2201 &adev->gds.oa_gfx_bo, NULL, NULL); 2202 if (r) 2203 return r; 2204 2205 adev->gfx.ce_ram_size = 0x8000; 2206 2207 r = gfx_v8_0_gpu_early_init(adev); 2208 if (r) 2209 return r; 2210 2211 return 0; 2212 } 2213 2214 static int gfx_v8_0_sw_fini(void *handle) 2215 { 2216 int i; 2217 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2218 2219 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2220 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2221 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2222 2223 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2224 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2225 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2226 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2227 2228 if (amdgpu_sriov_vf(adev)) { 2229 gfx_v8_0_compute_mqd_sw_fini(adev); 2230 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2231 gfx_v8_0_kiq_fini(adev); 2232 } 2233 2234 gfx_v8_0_mec_fini(adev); 2235 gfx_v8_0_rlc_fini(adev); 2236 gfx_v8_0_free_microcode(adev); 2237 2238 return 0; 2239 } 2240 2241 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2242 { 2243 uint32_t *modearray, *mod2array; 2244 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2245 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2246 u32 reg_offset; 2247 2248 modearray = adev->gfx.config.tile_mode_array; 2249 mod2array = adev->gfx.config.macrotile_mode_array; 2250 2251 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2252 modearray[reg_offset] = 0; 2253 2254 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2255 mod2array[reg_offset] = 0; 2256 2257 switch (adev->asic_type) { 2258 case CHIP_TOPAZ: 2259 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2260 PIPE_CONFIG(ADDR_SURF_P2) | 2261 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2262 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2263 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2264 PIPE_CONFIG(ADDR_SURF_P2) | 2265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2267 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2268 PIPE_CONFIG(ADDR_SURF_P2) | 2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2271 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2275 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2279 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2283 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2287 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2288 PIPE_CONFIG(ADDR_SURF_P2)); 2289 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2290 PIPE_CONFIG(ADDR_SURF_P2) | 2291 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2293 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2294 PIPE_CONFIG(ADDR_SURF_P2) | 2295 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2297 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2298 PIPE_CONFIG(ADDR_SURF_P2) | 2299 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2301 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2302 PIPE_CONFIG(ADDR_SURF_P2) | 2303 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2305 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2306 PIPE_CONFIG(ADDR_SURF_P2) | 2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2309 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2310 PIPE_CONFIG(ADDR_SURF_P2) | 2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2313 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2314 PIPE_CONFIG(ADDR_SURF_P2) | 2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2317 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2318 PIPE_CONFIG(ADDR_SURF_P2) | 2319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2321 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2322 PIPE_CONFIG(ADDR_SURF_P2) | 2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2325 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2326 PIPE_CONFIG(ADDR_SURF_P2) | 2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2329 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2330 PIPE_CONFIG(ADDR_SURF_P2) | 2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2333 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2334 PIPE_CONFIG(ADDR_SURF_P2) | 2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2337 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2338 PIPE_CONFIG(ADDR_SURF_P2) | 2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2341 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2342 PIPE_CONFIG(ADDR_SURF_P2) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2345 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2346 PIPE_CONFIG(ADDR_SURF_P2) | 2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2349 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2350 PIPE_CONFIG(ADDR_SURF_P2) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2353 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2354 PIPE_CONFIG(ADDR_SURF_P2) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2357 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2358 PIPE_CONFIG(ADDR_SURF_P2) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2361 2362 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2365 NUM_BANKS(ADDR_SURF_8_BANK)); 2366 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2369 NUM_BANKS(ADDR_SURF_8_BANK)); 2370 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2373 NUM_BANKS(ADDR_SURF_8_BANK)); 2374 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2377 NUM_BANKS(ADDR_SURF_8_BANK)); 2378 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2381 NUM_BANKS(ADDR_SURF_8_BANK)); 2382 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2385 NUM_BANKS(ADDR_SURF_8_BANK)); 2386 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2389 NUM_BANKS(ADDR_SURF_8_BANK)); 2390 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2393 NUM_BANKS(ADDR_SURF_16_BANK)); 2394 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2397 NUM_BANKS(ADDR_SURF_16_BANK)); 2398 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2401 NUM_BANKS(ADDR_SURF_16_BANK)); 2402 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2405 NUM_BANKS(ADDR_SURF_16_BANK)); 2406 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2409 NUM_BANKS(ADDR_SURF_16_BANK)); 2410 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2413 NUM_BANKS(ADDR_SURF_16_BANK)); 2414 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2417 NUM_BANKS(ADDR_SURF_8_BANK)); 2418 2419 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2420 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2421 reg_offset != 23) 2422 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2423 2424 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2425 if (reg_offset != 7) 2426 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2427 2428 break; 2429 case CHIP_FIJI: 2430 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2434 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2438 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2442 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2446 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2450 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2454 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2458 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2462 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2464 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2468 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2472 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2476 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2477 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2480 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2484 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2488 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2492 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2496 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2500 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2504 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2505 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2508 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2512 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2516 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2520 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2521 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2524 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2528 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2532 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2533 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2536 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2537 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2540 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2541 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2544 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2545 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2548 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2552 2553 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2556 NUM_BANKS(ADDR_SURF_8_BANK)); 2557 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2560 NUM_BANKS(ADDR_SURF_8_BANK)); 2561 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2564 NUM_BANKS(ADDR_SURF_8_BANK)); 2565 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2568 NUM_BANKS(ADDR_SURF_8_BANK)); 2569 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2572 NUM_BANKS(ADDR_SURF_8_BANK)); 2573 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2576 NUM_BANKS(ADDR_SURF_8_BANK)); 2577 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2580 NUM_BANKS(ADDR_SURF_8_BANK)); 2581 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2584 NUM_BANKS(ADDR_SURF_8_BANK)); 2585 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2588 NUM_BANKS(ADDR_SURF_8_BANK)); 2589 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2592 NUM_BANKS(ADDR_SURF_8_BANK)); 2593 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2596 NUM_BANKS(ADDR_SURF_8_BANK)); 2597 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2600 NUM_BANKS(ADDR_SURF_8_BANK)); 2601 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2604 NUM_BANKS(ADDR_SURF_8_BANK)); 2605 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2608 NUM_BANKS(ADDR_SURF_4_BANK)); 2609 2610 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2611 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2612 2613 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2614 if (reg_offset != 7) 2615 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2616 2617 break; 2618 case CHIP_TONGA: 2619 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2623 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2627 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2631 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2635 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2639 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2643 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2647 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2651 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2653 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2657 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2661 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2665 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2669 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2673 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2677 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2681 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2685 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2689 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2693 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2694 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2697 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2701 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2705 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2709 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2713 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2717 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2721 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2725 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2726 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2729 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2730 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2731 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2733 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2734 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2737 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2739 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2741 2742 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2745 NUM_BANKS(ADDR_SURF_16_BANK)); 2746 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2749 NUM_BANKS(ADDR_SURF_16_BANK)); 2750 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2753 NUM_BANKS(ADDR_SURF_16_BANK)); 2754 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2757 NUM_BANKS(ADDR_SURF_16_BANK)); 2758 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2761 NUM_BANKS(ADDR_SURF_16_BANK)); 2762 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2765 NUM_BANKS(ADDR_SURF_16_BANK)); 2766 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2769 NUM_BANKS(ADDR_SURF_16_BANK)); 2770 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2773 NUM_BANKS(ADDR_SURF_16_BANK)); 2774 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2777 NUM_BANKS(ADDR_SURF_16_BANK)); 2778 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2781 NUM_BANKS(ADDR_SURF_16_BANK)); 2782 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2785 NUM_BANKS(ADDR_SURF_16_BANK)); 2786 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2789 NUM_BANKS(ADDR_SURF_8_BANK)); 2790 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2793 NUM_BANKS(ADDR_SURF_4_BANK)); 2794 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2797 NUM_BANKS(ADDR_SURF_4_BANK)); 2798 2799 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2800 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2801 2802 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2803 if (reg_offset != 7) 2804 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2805 2806 break; 2807 case CHIP_POLARIS11: 2808 case CHIP_POLARIS12: 2809 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2813 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2817 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2821 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2825 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2829 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2833 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2837 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2841 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2842 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2843 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2847 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2851 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2855 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2859 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2863 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2867 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2871 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2875 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2879 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2883 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2887 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2891 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2895 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2899 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2903 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2907 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2911 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2915 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2917 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2919 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2920 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2921 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2923 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2924 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2925 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2927 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2929 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2931 2932 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2935 NUM_BANKS(ADDR_SURF_16_BANK)); 2936 2937 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2940 NUM_BANKS(ADDR_SURF_16_BANK)); 2941 2942 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2945 NUM_BANKS(ADDR_SURF_16_BANK)); 2946 2947 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2950 NUM_BANKS(ADDR_SURF_16_BANK)); 2951 2952 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2955 NUM_BANKS(ADDR_SURF_16_BANK)); 2956 2957 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2960 NUM_BANKS(ADDR_SURF_16_BANK)); 2961 2962 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2965 NUM_BANKS(ADDR_SURF_16_BANK)); 2966 2967 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2970 NUM_BANKS(ADDR_SURF_16_BANK)); 2971 2972 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2975 NUM_BANKS(ADDR_SURF_16_BANK)); 2976 2977 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2980 NUM_BANKS(ADDR_SURF_16_BANK)); 2981 2982 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2985 NUM_BANKS(ADDR_SURF_16_BANK)); 2986 2987 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2990 NUM_BANKS(ADDR_SURF_16_BANK)); 2991 2992 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2995 NUM_BANKS(ADDR_SURF_8_BANK)); 2996 2997 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3000 NUM_BANKS(ADDR_SURF_4_BANK)); 3001 3002 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3003 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3004 3005 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3006 if (reg_offset != 7) 3007 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3008 3009 break; 3010 case CHIP_POLARIS10: 3011 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3015 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3019 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3023 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3027 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3029 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3031 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3035 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3039 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3040 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3043 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3045 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3049 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3053 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3057 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3058 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3061 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3065 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3069 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3073 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3077 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3081 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3085 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3086 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3089 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3093 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3097 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3101 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3102 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3105 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3109 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3113 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3117 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3119 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3121 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3123 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3125 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3127 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3129 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3130 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3133 3134 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3137 NUM_BANKS(ADDR_SURF_16_BANK)); 3138 3139 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3142 NUM_BANKS(ADDR_SURF_16_BANK)); 3143 3144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3147 NUM_BANKS(ADDR_SURF_16_BANK)); 3148 3149 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3152 NUM_BANKS(ADDR_SURF_16_BANK)); 3153 3154 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3157 NUM_BANKS(ADDR_SURF_16_BANK)); 3158 3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3162 NUM_BANKS(ADDR_SURF_16_BANK)); 3163 3164 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3167 NUM_BANKS(ADDR_SURF_16_BANK)); 3168 3169 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3172 NUM_BANKS(ADDR_SURF_16_BANK)); 3173 3174 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3177 NUM_BANKS(ADDR_SURF_16_BANK)); 3178 3179 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3182 NUM_BANKS(ADDR_SURF_16_BANK)); 3183 3184 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3187 NUM_BANKS(ADDR_SURF_16_BANK)); 3188 3189 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3192 NUM_BANKS(ADDR_SURF_8_BANK)); 3193 3194 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3197 NUM_BANKS(ADDR_SURF_4_BANK)); 3198 3199 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3202 NUM_BANKS(ADDR_SURF_4_BANK)); 3203 3204 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3205 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3206 3207 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3208 if (reg_offset != 7) 3209 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3210 3211 break; 3212 case CHIP_STONEY: 3213 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3214 PIPE_CONFIG(ADDR_SURF_P2) | 3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3217 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3218 PIPE_CONFIG(ADDR_SURF_P2) | 3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3221 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3222 PIPE_CONFIG(ADDR_SURF_P2) | 3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3225 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3226 PIPE_CONFIG(ADDR_SURF_P2) | 3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3229 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3233 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3234 PIPE_CONFIG(ADDR_SURF_P2) | 3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3237 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3238 PIPE_CONFIG(ADDR_SURF_P2) | 3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3241 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3242 PIPE_CONFIG(ADDR_SURF_P2)); 3243 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3244 PIPE_CONFIG(ADDR_SURF_P2) | 3245 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3247 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3248 PIPE_CONFIG(ADDR_SURF_P2) | 3249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3251 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3252 PIPE_CONFIG(ADDR_SURF_P2) | 3253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3255 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3256 PIPE_CONFIG(ADDR_SURF_P2) | 3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3259 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3260 PIPE_CONFIG(ADDR_SURF_P2) | 3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3263 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3264 PIPE_CONFIG(ADDR_SURF_P2) | 3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3267 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3268 PIPE_CONFIG(ADDR_SURF_P2) | 3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3271 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3272 PIPE_CONFIG(ADDR_SURF_P2) | 3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3275 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3276 PIPE_CONFIG(ADDR_SURF_P2) | 3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3279 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3283 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3287 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3291 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3295 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3299 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3303 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3307 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3311 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3312 PIPE_CONFIG(ADDR_SURF_P2) | 3313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3315 3316 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3319 NUM_BANKS(ADDR_SURF_8_BANK)); 3320 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3323 NUM_BANKS(ADDR_SURF_8_BANK)); 3324 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3327 NUM_BANKS(ADDR_SURF_8_BANK)); 3328 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3331 NUM_BANKS(ADDR_SURF_8_BANK)); 3332 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3335 NUM_BANKS(ADDR_SURF_8_BANK)); 3336 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3339 NUM_BANKS(ADDR_SURF_8_BANK)); 3340 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3343 NUM_BANKS(ADDR_SURF_8_BANK)); 3344 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3347 NUM_BANKS(ADDR_SURF_16_BANK)); 3348 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3351 NUM_BANKS(ADDR_SURF_16_BANK)); 3352 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3355 NUM_BANKS(ADDR_SURF_16_BANK)); 3356 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3359 NUM_BANKS(ADDR_SURF_16_BANK)); 3360 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3363 NUM_BANKS(ADDR_SURF_16_BANK)); 3364 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3367 NUM_BANKS(ADDR_SURF_16_BANK)); 3368 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3371 NUM_BANKS(ADDR_SURF_8_BANK)); 3372 3373 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3374 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3375 reg_offset != 23) 3376 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3377 3378 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3379 if (reg_offset != 7) 3380 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3381 3382 break; 3383 default: 3384 dev_warn(adev->dev, 3385 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3386 adev->asic_type); 3387 3388 case CHIP_CARRIZO: 3389 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3390 PIPE_CONFIG(ADDR_SURF_P2) | 3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3393 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3394 PIPE_CONFIG(ADDR_SURF_P2) | 3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3397 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3398 PIPE_CONFIG(ADDR_SURF_P2) | 3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3401 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3402 PIPE_CONFIG(ADDR_SURF_P2) | 3403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3405 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3406 PIPE_CONFIG(ADDR_SURF_P2) | 3407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3409 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3410 PIPE_CONFIG(ADDR_SURF_P2) | 3411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3413 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3414 PIPE_CONFIG(ADDR_SURF_P2) | 3415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3417 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3418 PIPE_CONFIG(ADDR_SURF_P2)); 3419 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3420 PIPE_CONFIG(ADDR_SURF_P2) | 3421 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3423 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3424 PIPE_CONFIG(ADDR_SURF_P2) | 3425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3427 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3428 PIPE_CONFIG(ADDR_SURF_P2) | 3429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3431 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3432 PIPE_CONFIG(ADDR_SURF_P2) | 3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3435 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3436 PIPE_CONFIG(ADDR_SURF_P2) | 3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3439 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3440 PIPE_CONFIG(ADDR_SURF_P2) | 3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3443 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3444 PIPE_CONFIG(ADDR_SURF_P2) | 3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3447 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3448 PIPE_CONFIG(ADDR_SURF_P2) | 3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3451 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3452 PIPE_CONFIG(ADDR_SURF_P2) | 3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3455 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3456 PIPE_CONFIG(ADDR_SURF_P2) | 3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3459 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3460 PIPE_CONFIG(ADDR_SURF_P2) | 3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3463 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3464 PIPE_CONFIG(ADDR_SURF_P2) | 3465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3467 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3468 PIPE_CONFIG(ADDR_SURF_P2) | 3469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3471 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3472 PIPE_CONFIG(ADDR_SURF_P2) | 3473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3475 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3476 PIPE_CONFIG(ADDR_SURF_P2) | 3477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3479 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3480 PIPE_CONFIG(ADDR_SURF_P2) | 3481 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3483 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3484 PIPE_CONFIG(ADDR_SURF_P2) | 3485 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3487 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3488 PIPE_CONFIG(ADDR_SURF_P2) | 3489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3491 3492 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3495 NUM_BANKS(ADDR_SURF_8_BANK)); 3496 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3499 NUM_BANKS(ADDR_SURF_8_BANK)); 3500 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3503 NUM_BANKS(ADDR_SURF_8_BANK)); 3504 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3507 NUM_BANKS(ADDR_SURF_8_BANK)); 3508 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3511 NUM_BANKS(ADDR_SURF_8_BANK)); 3512 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3515 NUM_BANKS(ADDR_SURF_8_BANK)); 3516 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3519 NUM_BANKS(ADDR_SURF_8_BANK)); 3520 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3523 NUM_BANKS(ADDR_SURF_16_BANK)); 3524 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3527 NUM_BANKS(ADDR_SURF_16_BANK)); 3528 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3531 NUM_BANKS(ADDR_SURF_16_BANK)); 3532 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3535 NUM_BANKS(ADDR_SURF_16_BANK)); 3536 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3539 NUM_BANKS(ADDR_SURF_16_BANK)); 3540 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3543 NUM_BANKS(ADDR_SURF_16_BANK)); 3544 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3547 NUM_BANKS(ADDR_SURF_8_BANK)); 3548 3549 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3550 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3551 reg_offset != 23) 3552 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3553 3554 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3555 if (reg_offset != 7) 3556 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3557 3558 break; 3559 } 3560 } 3561 3562 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3563 u32 se_num, u32 sh_num, u32 instance) 3564 { 3565 u32 data; 3566 3567 if (instance == 0xffffffff) 3568 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3569 else 3570 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3571 3572 if (se_num == 0xffffffff) 3573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3574 else 3575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3576 3577 if (sh_num == 0xffffffff) 3578 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3579 else 3580 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3581 3582 WREG32(mmGRBM_GFX_INDEX, data); 3583 } 3584 3585 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3586 { 3587 return (u32)((1ULL << bit_width) - 1); 3588 } 3589 3590 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3591 { 3592 u32 data, mask; 3593 3594 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3595 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3596 3597 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3598 3599 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3600 adev->gfx.config.max_sh_per_se); 3601 3602 return (~data) & mask; 3603 } 3604 3605 static void 3606 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3607 { 3608 switch (adev->asic_type) { 3609 case CHIP_FIJI: 3610 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3611 RB_XSEL2(1) | PKR_MAP(2) | 3612 PKR_XSEL(1) | PKR_YSEL(1) | 3613 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3614 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3615 SE_PAIR_YSEL(2); 3616 break; 3617 case CHIP_TONGA: 3618 case CHIP_POLARIS10: 3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3620 SE_XSEL(1) | SE_YSEL(1); 3621 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3622 SE_PAIR_YSEL(2); 3623 break; 3624 case CHIP_TOPAZ: 3625 case CHIP_CARRIZO: 3626 *rconf |= RB_MAP_PKR0(2); 3627 *rconf1 |= 0x0; 3628 break; 3629 case CHIP_POLARIS11: 3630 case CHIP_POLARIS12: 3631 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3632 SE_XSEL(1) | SE_YSEL(1); 3633 *rconf1 |= 0x0; 3634 break; 3635 case CHIP_STONEY: 3636 *rconf |= 0x0; 3637 *rconf1 |= 0x0; 3638 break; 3639 default: 3640 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3641 break; 3642 } 3643 } 3644 3645 static void 3646 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3647 u32 raster_config, u32 raster_config_1, 3648 unsigned rb_mask, unsigned num_rb) 3649 { 3650 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3651 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3652 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3653 unsigned rb_per_se = num_rb / num_se; 3654 unsigned se_mask[4]; 3655 unsigned se; 3656 3657 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3658 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3659 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3660 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3661 3662 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3663 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3664 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3665 3666 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3667 (!se_mask[2] && !se_mask[3]))) { 3668 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3669 3670 if (!se_mask[0] && !se_mask[1]) { 3671 raster_config_1 |= 3672 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3673 } else { 3674 raster_config_1 |= 3675 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3676 } 3677 } 3678 3679 for (se = 0; se < num_se; se++) { 3680 unsigned raster_config_se = raster_config; 3681 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3682 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3683 int idx = (se / 2) * 2; 3684 3685 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3686 raster_config_se &= ~SE_MAP_MASK; 3687 3688 if (!se_mask[idx]) { 3689 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3690 } else { 3691 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3692 } 3693 } 3694 3695 pkr0_mask &= rb_mask; 3696 pkr1_mask &= rb_mask; 3697 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3698 raster_config_se &= ~PKR_MAP_MASK; 3699 3700 if (!pkr0_mask) { 3701 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3702 } else { 3703 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3704 } 3705 } 3706 3707 if (rb_per_se >= 2) { 3708 unsigned rb0_mask = 1 << (se * rb_per_se); 3709 unsigned rb1_mask = rb0_mask << 1; 3710 3711 rb0_mask &= rb_mask; 3712 rb1_mask &= rb_mask; 3713 if (!rb0_mask || !rb1_mask) { 3714 raster_config_se &= ~RB_MAP_PKR0_MASK; 3715 3716 if (!rb0_mask) { 3717 raster_config_se |= 3718 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3719 } else { 3720 raster_config_se |= 3721 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3722 } 3723 } 3724 3725 if (rb_per_se > 2) { 3726 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3727 rb1_mask = rb0_mask << 1; 3728 rb0_mask &= rb_mask; 3729 rb1_mask &= rb_mask; 3730 if (!rb0_mask || !rb1_mask) { 3731 raster_config_se &= ~RB_MAP_PKR1_MASK; 3732 3733 if (!rb0_mask) { 3734 raster_config_se |= 3735 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3736 } else { 3737 raster_config_se |= 3738 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3739 } 3740 } 3741 } 3742 } 3743 3744 /* GRBM_GFX_INDEX has a different offset on VI */ 3745 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3746 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3747 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3748 } 3749 3750 /* GRBM_GFX_INDEX has a different offset on VI */ 3751 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3752 } 3753 3754 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3755 { 3756 int i, j; 3757 u32 data; 3758 u32 raster_config = 0, raster_config_1 = 0; 3759 u32 active_rbs = 0; 3760 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3761 adev->gfx.config.max_sh_per_se; 3762 unsigned num_rb_pipes; 3763 3764 mutex_lock(&adev->grbm_idx_mutex); 3765 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3766 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3767 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3768 data = gfx_v8_0_get_rb_active_bitmap(adev); 3769 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3770 rb_bitmap_width_per_sh); 3771 } 3772 } 3773 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3774 3775 adev->gfx.config.backend_enable_mask = active_rbs; 3776 adev->gfx.config.num_rbs = hweight32(active_rbs); 3777 3778 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3779 adev->gfx.config.max_shader_engines, 16); 3780 3781 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3782 3783 if (!adev->gfx.config.backend_enable_mask || 3784 adev->gfx.config.num_rbs >= num_rb_pipes) { 3785 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3786 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3787 } else { 3788 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3789 adev->gfx.config.backend_enable_mask, 3790 num_rb_pipes); 3791 } 3792 3793 /* cache the values for userspace */ 3794 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3795 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3796 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3797 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3798 RREG32(mmCC_RB_BACKEND_DISABLE); 3799 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3800 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3801 adev->gfx.config.rb_config[i][j].raster_config = 3802 RREG32(mmPA_SC_RASTER_CONFIG); 3803 adev->gfx.config.rb_config[i][j].raster_config_1 = 3804 RREG32(mmPA_SC_RASTER_CONFIG_1); 3805 } 3806 } 3807 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3808 mutex_unlock(&adev->grbm_idx_mutex); 3809 } 3810 3811 /** 3812 * gfx_v8_0_init_compute_vmid - gart enable 3813 * 3814 * @rdev: amdgpu_device pointer 3815 * 3816 * Initialize compute vmid sh_mem registers 3817 * 3818 */ 3819 #define DEFAULT_SH_MEM_BASES (0x6000) 3820 #define FIRST_COMPUTE_VMID (8) 3821 #define LAST_COMPUTE_VMID (16) 3822 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3823 { 3824 int i; 3825 uint32_t sh_mem_config; 3826 uint32_t sh_mem_bases; 3827 3828 /* 3829 * Configure apertures: 3830 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3831 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3832 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3833 */ 3834 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3835 3836 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3837 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3838 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3839 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3840 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3841 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3842 3843 mutex_lock(&adev->srbm_mutex); 3844 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3845 vi_srbm_select(adev, 0, 0, 0, i); 3846 /* CP and shaders */ 3847 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3848 WREG32(mmSH_MEM_APE1_BASE, 1); 3849 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3850 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3851 } 3852 vi_srbm_select(adev, 0, 0, 0, 0); 3853 mutex_unlock(&adev->srbm_mutex); 3854 } 3855 3856 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3857 { 3858 switch (adev->asic_type) { 3859 default: 3860 adev->gfx.config.double_offchip_lds_buf = 1; 3861 break; 3862 case CHIP_CARRIZO: 3863 case CHIP_STONEY: 3864 adev->gfx.config.double_offchip_lds_buf = 0; 3865 break; 3866 } 3867 } 3868 3869 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3870 { 3871 u32 tmp, sh_static_mem_cfg; 3872 int i; 3873 3874 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3875 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3876 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3877 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3878 3879 gfx_v8_0_tiling_mode_table_init(adev); 3880 gfx_v8_0_setup_rb(adev); 3881 gfx_v8_0_get_cu_info(adev); 3882 gfx_v8_0_config_init(adev); 3883 3884 /* XXX SH_MEM regs */ 3885 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3886 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3887 SWIZZLE_ENABLE, 1); 3888 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3889 ELEMENT_SIZE, 1); 3890 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3891 INDEX_STRIDE, 3); 3892 mutex_lock(&adev->srbm_mutex); 3893 for (i = 0; i < adev->vm_manager.num_ids; i++) { 3894 vi_srbm_select(adev, 0, 0, 0, i); 3895 /* CP and shaders */ 3896 if (i == 0) { 3897 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3898 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3899 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3900 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3901 WREG32(mmSH_MEM_CONFIG, tmp); 3902 WREG32(mmSH_MEM_BASES, 0); 3903 } else { 3904 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3905 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3906 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3907 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3908 WREG32(mmSH_MEM_CONFIG, tmp); 3909 tmp = adev->mc.shared_aperture_start >> 48; 3910 WREG32(mmSH_MEM_BASES, tmp); 3911 } 3912 3913 WREG32(mmSH_MEM_APE1_BASE, 1); 3914 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3915 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3916 } 3917 vi_srbm_select(adev, 0, 0, 0, 0); 3918 mutex_unlock(&adev->srbm_mutex); 3919 3920 gfx_v8_0_init_compute_vmid(adev); 3921 3922 mutex_lock(&adev->grbm_idx_mutex); 3923 /* 3924 * making sure that the following register writes will be broadcasted 3925 * to all the shaders 3926 */ 3927 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3928 3929 WREG32(mmPA_SC_FIFO_SIZE, 3930 (adev->gfx.config.sc_prim_fifo_size_frontend << 3931 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3932 (adev->gfx.config.sc_prim_fifo_size_backend << 3933 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3934 (adev->gfx.config.sc_hiz_tile_fifo_size << 3935 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3936 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3937 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3938 3939 tmp = RREG32(mmSPI_ARB_PRIORITY); 3940 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3941 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3942 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3943 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3944 WREG32(mmSPI_ARB_PRIORITY, tmp); 3945 3946 mutex_unlock(&adev->grbm_idx_mutex); 3947 3948 } 3949 3950 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3951 { 3952 u32 i, j, k; 3953 u32 mask; 3954 3955 mutex_lock(&adev->grbm_idx_mutex); 3956 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3957 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3958 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3959 for (k = 0; k < adev->usec_timeout; k++) { 3960 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3961 break; 3962 udelay(1); 3963 } 3964 } 3965 } 3966 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3967 mutex_unlock(&adev->grbm_idx_mutex); 3968 3969 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3970 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3971 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3972 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3973 for (k = 0; k < adev->usec_timeout; k++) { 3974 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3975 break; 3976 udelay(1); 3977 } 3978 } 3979 3980 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3981 bool enable) 3982 { 3983 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3984 3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3986 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3987 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3988 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3989 3990 WREG32(mmCP_INT_CNTL_RING0, tmp); 3991 } 3992 3993 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3994 { 3995 /* csib */ 3996 WREG32(mmRLC_CSIB_ADDR_HI, 3997 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3998 WREG32(mmRLC_CSIB_ADDR_LO, 3999 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 4000 WREG32(mmRLC_CSIB_LENGTH, 4001 adev->gfx.rlc.clear_state_size); 4002 } 4003 4004 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4005 int ind_offset, 4006 int list_size, 4007 int *unique_indices, 4008 int *indices_count, 4009 int max_indices, 4010 int *ind_start_offsets, 4011 int *offset_count, 4012 int max_offset) 4013 { 4014 int indices; 4015 bool new_entry = true; 4016 4017 for (; ind_offset < list_size; ind_offset++) { 4018 4019 if (new_entry) { 4020 new_entry = false; 4021 ind_start_offsets[*offset_count] = ind_offset; 4022 *offset_count = *offset_count + 1; 4023 BUG_ON(*offset_count >= max_offset); 4024 } 4025 4026 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4027 new_entry = true; 4028 continue; 4029 } 4030 4031 ind_offset += 2; 4032 4033 /* look for the matching indice */ 4034 for (indices = 0; 4035 indices < *indices_count; 4036 indices++) { 4037 if (unique_indices[indices] == 4038 register_list_format[ind_offset]) 4039 break; 4040 } 4041 4042 if (indices >= *indices_count) { 4043 unique_indices[*indices_count] = 4044 register_list_format[ind_offset]; 4045 indices = *indices_count; 4046 *indices_count = *indices_count + 1; 4047 BUG_ON(*indices_count >= max_indices); 4048 } 4049 4050 register_list_format[ind_offset] = indices; 4051 } 4052 } 4053 4054 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4055 { 4056 int i, temp, data; 4057 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4058 int indices_count = 0; 4059 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4060 int offset_count = 0; 4061 4062 int list_size; 4063 unsigned int *register_list_format = 4064 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4065 if (!register_list_format) 4066 return -ENOMEM; 4067 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4068 adev->gfx.rlc.reg_list_format_size_bytes); 4069 4070 gfx_v8_0_parse_ind_reg_list(register_list_format, 4071 RLC_FormatDirectRegListLength, 4072 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4073 unique_indices, 4074 &indices_count, 4075 sizeof(unique_indices) / sizeof(int), 4076 indirect_start_offsets, 4077 &offset_count, 4078 sizeof(indirect_start_offsets)/sizeof(int)); 4079 4080 /* save and restore list */ 4081 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4082 4083 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4084 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4085 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4086 4087 /* indirect list */ 4088 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4089 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4090 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4091 4092 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4093 list_size = list_size >> 1; 4094 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4095 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4096 4097 /* starting offsets starts */ 4098 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4099 adev->gfx.rlc.starting_offsets_start); 4100 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 4101 WREG32(mmRLC_GPM_SCRATCH_DATA, 4102 indirect_start_offsets[i]); 4103 4104 /* unique indices */ 4105 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4106 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4107 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 4108 if (unique_indices[i] != 0) { 4109 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4110 WREG32(data + i, unique_indices[i] >> 20); 4111 } 4112 } 4113 kfree(register_list_format); 4114 4115 return 0; 4116 } 4117 4118 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4119 { 4120 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4121 } 4122 4123 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4124 { 4125 uint32_t data; 4126 4127 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4128 4129 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4130 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4131 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4132 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4133 WREG32(mmRLC_PG_DELAY, data); 4134 4135 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4136 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4137 4138 } 4139 4140 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4141 bool enable) 4142 { 4143 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4144 } 4145 4146 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4147 bool enable) 4148 { 4149 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4150 } 4151 4152 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4153 { 4154 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4155 } 4156 4157 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4158 { 4159 if ((adev->asic_type == CHIP_CARRIZO) || 4160 (adev->asic_type == CHIP_STONEY)) { 4161 gfx_v8_0_init_csb(adev); 4162 gfx_v8_0_init_save_restore_list(adev); 4163 gfx_v8_0_enable_save_restore_machine(adev); 4164 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4165 gfx_v8_0_init_power_gating(adev); 4166 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4167 } else if ((adev->asic_type == CHIP_POLARIS11) || 4168 (adev->asic_type == CHIP_POLARIS12)) { 4169 gfx_v8_0_init_csb(adev); 4170 gfx_v8_0_init_save_restore_list(adev); 4171 gfx_v8_0_enable_save_restore_machine(adev); 4172 gfx_v8_0_init_power_gating(adev); 4173 } 4174 4175 } 4176 4177 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4178 { 4179 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4180 4181 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4182 gfx_v8_0_wait_for_rlc_serdes(adev); 4183 } 4184 4185 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4186 { 4187 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4188 udelay(50); 4189 4190 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4191 udelay(50); 4192 } 4193 4194 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4195 { 4196 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4197 4198 /* carrizo do enable cp interrupt after cp inited */ 4199 if (!(adev->flags & AMD_IS_APU)) 4200 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4201 4202 udelay(50); 4203 } 4204 4205 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4206 { 4207 const struct rlc_firmware_header_v2_0 *hdr; 4208 const __le32 *fw_data; 4209 unsigned i, fw_size; 4210 4211 if (!adev->gfx.rlc_fw) 4212 return -EINVAL; 4213 4214 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4215 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4216 4217 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4218 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4219 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4220 4221 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4222 for (i = 0; i < fw_size; i++) 4223 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4224 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4225 4226 return 0; 4227 } 4228 4229 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4230 { 4231 int r; 4232 u32 tmp; 4233 4234 gfx_v8_0_rlc_stop(adev); 4235 4236 /* disable CG */ 4237 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4238 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4239 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4240 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4241 if (adev->asic_type == CHIP_POLARIS11 || 4242 adev->asic_type == CHIP_POLARIS10 || 4243 adev->asic_type == CHIP_POLARIS12) { 4244 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4245 tmp &= ~0x3; 4246 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4247 } 4248 4249 /* disable PG */ 4250 WREG32(mmRLC_PG_CNTL, 0); 4251 4252 gfx_v8_0_rlc_reset(adev); 4253 gfx_v8_0_init_pg(adev); 4254 4255 if (!adev->pp_enabled) { 4256 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4257 /* legacy rlc firmware loading */ 4258 r = gfx_v8_0_rlc_load_microcode(adev); 4259 if (r) 4260 return r; 4261 } else { 4262 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4263 AMDGPU_UCODE_ID_RLC_G); 4264 if (r) 4265 return -EINVAL; 4266 } 4267 } 4268 4269 gfx_v8_0_rlc_start(adev); 4270 4271 return 0; 4272 } 4273 4274 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4275 { 4276 int i; 4277 u32 tmp = RREG32(mmCP_ME_CNTL); 4278 4279 if (enable) { 4280 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4281 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4282 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4283 } else { 4284 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4285 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4286 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4287 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4288 adev->gfx.gfx_ring[i].ready = false; 4289 } 4290 WREG32(mmCP_ME_CNTL, tmp); 4291 udelay(50); 4292 } 4293 4294 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4295 { 4296 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4297 const struct gfx_firmware_header_v1_0 *ce_hdr; 4298 const struct gfx_firmware_header_v1_0 *me_hdr; 4299 const __le32 *fw_data; 4300 unsigned i, fw_size; 4301 4302 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4303 return -EINVAL; 4304 4305 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4306 adev->gfx.pfp_fw->data; 4307 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4308 adev->gfx.ce_fw->data; 4309 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4310 adev->gfx.me_fw->data; 4311 4312 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4313 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4314 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4315 4316 gfx_v8_0_cp_gfx_enable(adev, false); 4317 4318 /* PFP */ 4319 fw_data = (const __le32 *) 4320 (adev->gfx.pfp_fw->data + 4321 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4322 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4323 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4324 for (i = 0; i < fw_size; i++) 4325 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4326 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4327 4328 /* CE */ 4329 fw_data = (const __le32 *) 4330 (adev->gfx.ce_fw->data + 4331 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4332 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4333 WREG32(mmCP_CE_UCODE_ADDR, 0); 4334 for (i = 0; i < fw_size; i++) 4335 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4336 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4337 4338 /* ME */ 4339 fw_data = (const __le32 *) 4340 (adev->gfx.me_fw->data + 4341 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4342 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4343 WREG32(mmCP_ME_RAM_WADDR, 0); 4344 for (i = 0; i < fw_size; i++) 4345 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4346 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4347 4348 return 0; 4349 } 4350 4351 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4352 { 4353 u32 count = 0; 4354 const struct cs_section_def *sect = NULL; 4355 const struct cs_extent_def *ext = NULL; 4356 4357 /* begin clear state */ 4358 count += 2; 4359 /* context control state */ 4360 count += 3; 4361 4362 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4363 for (ext = sect->section; ext->extent != NULL; ++ext) { 4364 if (sect->id == SECT_CONTEXT) 4365 count += 2 + ext->reg_count; 4366 else 4367 return 0; 4368 } 4369 } 4370 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4371 count += 4; 4372 /* end clear state */ 4373 count += 2; 4374 /* clear state */ 4375 count += 2; 4376 4377 return count; 4378 } 4379 4380 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4381 { 4382 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4383 const struct cs_section_def *sect = NULL; 4384 const struct cs_extent_def *ext = NULL; 4385 int r, i; 4386 4387 /* init the CP */ 4388 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4389 WREG32(mmCP_ENDIAN_SWAP, 0); 4390 WREG32(mmCP_DEVICE_ID, 1); 4391 4392 gfx_v8_0_cp_gfx_enable(adev, true); 4393 4394 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4395 if (r) { 4396 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4397 return r; 4398 } 4399 4400 /* clear state buffer */ 4401 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4402 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4403 4404 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4405 amdgpu_ring_write(ring, 0x80000000); 4406 amdgpu_ring_write(ring, 0x80000000); 4407 4408 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4409 for (ext = sect->section; ext->extent != NULL; ++ext) { 4410 if (sect->id == SECT_CONTEXT) { 4411 amdgpu_ring_write(ring, 4412 PACKET3(PACKET3_SET_CONTEXT_REG, 4413 ext->reg_count)); 4414 amdgpu_ring_write(ring, 4415 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4416 for (i = 0; i < ext->reg_count; i++) 4417 amdgpu_ring_write(ring, ext->extent[i]); 4418 } 4419 } 4420 } 4421 4422 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4423 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4424 switch (adev->asic_type) { 4425 case CHIP_TONGA: 4426 case CHIP_POLARIS10: 4427 amdgpu_ring_write(ring, 0x16000012); 4428 amdgpu_ring_write(ring, 0x0000002A); 4429 break; 4430 case CHIP_POLARIS11: 4431 case CHIP_POLARIS12: 4432 amdgpu_ring_write(ring, 0x16000012); 4433 amdgpu_ring_write(ring, 0x00000000); 4434 break; 4435 case CHIP_FIJI: 4436 amdgpu_ring_write(ring, 0x3a00161a); 4437 amdgpu_ring_write(ring, 0x0000002e); 4438 break; 4439 case CHIP_CARRIZO: 4440 amdgpu_ring_write(ring, 0x00000002); 4441 amdgpu_ring_write(ring, 0x00000000); 4442 break; 4443 case CHIP_TOPAZ: 4444 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4445 0x00000000 : 0x00000002); 4446 amdgpu_ring_write(ring, 0x00000000); 4447 break; 4448 case CHIP_STONEY: 4449 amdgpu_ring_write(ring, 0x00000000); 4450 amdgpu_ring_write(ring, 0x00000000); 4451 break; 4452 default: 4453 BUG(); 4454 } 4455 4456 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4457 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4458 4459 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4460 amdgpu_ring_write(ring, 0); 4461 4462 /* init the CE partitions */ 4463 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4464 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4465 amdgpu_ring_write(ring, 0x8000); 4466 amdgpu_ring_write(ring, 0x8000); 4467 4468 amdgpu_ring_commit(ring); 4469 4470 return 0; 4471 } 4472 4473 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4474 { 4475 struct amdgpu_ring *ring; 4476 u32 tmp; 4477 u32 rb_bufsz; 4478 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4479 int r; 4480 4481 /* Set the write pointer delay */ 4482 WREG32(mmCP_RB_WPTR_DELAY, 0); 4483 4484 /* set the RB to use vmid 0 */ 4485 WREG32(mmCP_RB_VMID, 0); 4486 4487 /* Set ring buffer size */ 4488 ring = &adev->gfx.gfx_ring[0]; 4489 rb_bufsz = order_base_2(ring->ring_size / 8); 4490 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4491 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4493 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4494 #ifdef __BIG_ENDIAN 4495 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4496 #endif 4497 WREG32(mmCP_RB0_CNTL, tmp); 4498 4499 /* Initialize the ring buffer's read and write pointers */ 4500 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4501 ring->wptr = 0; 4502 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4503 4504 /* set the wb address wether it's enabled or not */ 4505 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4506 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4507 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4508 4509 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4510 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4511 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4512 mdelay(1); 4513 WREG32(mmCP_RB0_CNTL, tmp); 4514 4515 rb_addr = ring->gpu_addr >> 8; 4516 WREG32(mmCP_RB0_BASE, rb_addr); 4517 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4518 4519 /* no gfx doorbells on iceland */ 4520 if (adev->asic_type != CHIP_TOPAZ) { 4521 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4522 if (ring->use_doorbell) { 4523 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4524 DOORBELL_OFFSET, ring->doorbell_index); 4525 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4526 DOORBELL_HIT, 0); 4527 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4528 DOORBELL_EN, 1); 4529 } else { 4530 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4531 DOORBELL_EN, 0); 4532 } 4533 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4534 4535 if (adev->asic_type == CHIP_TONGA) { 4536 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4537 DOORBELL_RANGE_LOWER, 4538 AMDGPU_DOORBELL_GFX_RING0); 4539 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4540 4541 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4542 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4543 } 4544 4545 } 4546 4547 /* start the ring */ 4548 amdgpu_ring_clear_ring(ring); 4549 gfx_v8_0_cp_gfx_start(adev); 4550 ring->ready = true; 4551 r = amdgpu_ring_test_ring(ring); 4552 if (r) 4553 ring->ready = false; 4554 4555 return r; 4556 } 4557 4558 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4559 { 4560 int i; 4561 4562 if (enable) { 4563 WREG32(mmCP_MEC_CNTL, 0); 4564 } else { 4565 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4566 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4567 adev->gfx.compute_ring[i].ready = false; 4568 } 4569 udelay(50); 4570 } 4571 4572 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4573 { 4574 const struct gfx_firmware_header_v1_0 *mec_hdr; 4575 const __le32 *fw_data; 4576 unsigned i, fw_size; 4577 4578 if (!adev->gfx.mec_fw) 4579 return -EINVAL; 4580 4581 gfx_v8_0_cp_compute_enable(adev, false); 4582 4583 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4584 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4585 4586 fw_data = (const __le32 *) 4587 (adev->gfx.mec_fw->data + 4588 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4589 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4590 4591 /* MEC1 */ 4592 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4593 for (i = 0; i < fw_size; i++) 4594 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4595 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4596 4597 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4598 if (adev->gfx.mec2_fw) { 4599 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4600 4601 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4602 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4603 4604 fw_data = (const __le32 *) 4605 (adev->gfx.mec2_fw->data + 4606 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4607 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4608 4609 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4610 for (i = 0; i < fw_size; i++) 4611 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4612 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4613 } 4614 4615 return 0; 4616 } 4617 4618 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4619 { 4620 int i, r; 4621 4622 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4623 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4624 4625 if (ring->mqd_obj) { 4626 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4627 if (unlikely(r != 0)) 4628 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4629 4630 amdgpu_bo_unpin(ring->mqd_obj); 4631 amdgpu_bo_unreserve(ring->mqd_obj); 4632 4633 amdgpu_bo_unref(&ring->mqd_obj); 4634 ring->mqd_obj = NULL; 4635 ring->mqd_ptr = NULL; 4636 ring->mqd_gpu_addr = 0; 4637 } 4638 } 4639 } 4640 4641 /* KIQ functions */ 4642 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4643 { 4644 uint32_t tmp; 4645 struct amdgpu_device *adev = ring->adev; 4646 4647 /* tell RLC which is KIQ queue */ 4648 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4649 tmp &= 0xffffff00; 4650 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4651 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4652 tmp |= 0x80; 4653 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4654 } 4655 4656 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) 4657 { 4658 amdgpu_ring_alloc(ring, 8); 4659 /* set resources */ 4660 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4661 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4662 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ 4663 amdgpu_ring_write(ring, 0); /* queue mask hi */ 4664 amdgpu_ring_write(ring, 0); /* gws mask lo */ 4665 amdgpu_ring_write(ring, 0); /* gws mask hi */ 4666 amdgpu_ring_write(ring, 0); /* oac mask */ 4667 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ 4668 amdgpu_ring_commit(ring); 4669 udelay(50); 4670 } 4671 4672 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, 4673 struct amdgpu_ring *ring) 4674 { 4675 struct amdgpu_device *adev = kiq_ring->adev; 4676 uint64_t mqd_addr, wptr_addr; 4677 4678 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4679 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4680 amdgpu_ring_alloc(kiq_ring, 8); 4681 4682 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4683 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4684 amdgpu_ring_write(kiq_ring, 0x21010000); 4685 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | 4686 (ring->queue << 26) | 4687 (ring->pipe << 29) | 4688 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ 4689 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4690 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4691 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4692 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4693 amdgpu_ring_commit(kiq_ring); 4694 udelay(50); 4695 } 4696 4697 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4698 { 4699 struct amdgpu_device *adev = ring->adev; 4700 struct vi_mqd *mqd = ring->mqd_ptr; 4701 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4702 uint32_t tmp; 4703 4704 mqd->header = 0xC0310800; 4705 mqd->compute_pipelinestat_enable = 0x00000001; 4706 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4707 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4708 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4709 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4710 mqd->compute_misc_reserved = 0x00000003; 4711 4712 eop_base_addr = ring->eop_gpu_addr >> 8; 4713 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4714 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4715 4716 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4717 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4718 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4719 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4720 4721 mqd->cp_hqd_eop_control = tmp; 4722 4723 /* enable doorbell? */ 4724 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4725 4726 if (ring->use_doorbell) 4727 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4728 DOORBELL_EN, 1); 4729 else 4730 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4731 DOORBELL_EN, 0); 4732 4733 mqd->cp_hqd_pq_doorbell_control = tmp; 4734 4735 /* disable the queue if it's active */ 4736 mqd->cp_hqd_dequeue_request = 0; 4737 mqd->cp_hqd_pq_rptr = 0; 4738 mqd->cp_hqd_pq_wptr = 0; 4739 4740 /* set the pointer to the MQD */ 4741 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4742 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4743 4744 /* set MQD vmid to 0 */ 4745 tmp = RREG32(mmCP_MQD_CONTROL); 4746 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4747 mqd->cp_mqd_control = tmp; 4748 4749 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4750 hqd_gpu_addr = ring->gpu_addr >> 8; 4751 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4752 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4753 4754 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4755 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4757 (order_base_2(ring->ring_size / 4) - 1)); 4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4759 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4760 #ifdef __BIG_ENDIAN 4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4762 #endif 4763 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4766 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4767 mqd->cp_hqd_pq_control = tmp; 4768 4769 /* set the wb address whether it's enabled or not */ 4770 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4771 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4772 mqd->cp_hqd_pq_rptr_report_addr_hi = 4773 upper_32_bits(wb_gpu_addr) & 0xffff; 4774 4775 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4776 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4777 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4778 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4779 4780 tmp = 0; 4781 /* enable the doorbell if requested */ 4782 if (ring->use_doorbell) { 4783 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4785 DOORBELL_OFFSET, ring->doorbell_index); 4786 4787 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4788 DOORBELL_EN, 1); 4789 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4790 DOORBELL_SOURCE, 0); 4791 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4792 DOORBELL_HIT, 0); 4793 } 4794 4795 mqd->cp_hqd_pq_doorbell_control = tmp; 4796 4797 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4798 ring->wptr = 0; 4799 mqd->cp_hqd_pq_wptr = ring->wptr; 4800 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4801 4802 /* set the vmid for the queue */ 4803 mqd->cp_hqd_vmid = 0; 4804 4805 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4806 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4807 mqd->cp_hqd_persistent_state = tmp; 4808 4809 /* activate the queue */ 4810 mqd->cp_hqd_active = 1; 4811 4812 return 0; 4813 } 4814 4815 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) 4816 { 4817 struct amdgpu_device *adev = ring->adev; 4818 struct vi_mqd *mqd = ring->mqd_ptr; 4819 uint32_t tmp; 4820 int j; 4821 4822 /* disable wptr polling */ 4823 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4824 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4825 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4826 4827 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); 4828 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); 4829 4830 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4831 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); 4832 4833 /* enable doorbell? */ 4834 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4835 4836 /* disable the queue if it's active */ 4837 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4838 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4839 for (j = 0; j < adev->usec_timeout; j++) { 4840 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4841 break; 4842 udelay(1); 4843 } 4844 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4845 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4846 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4847 } 4848 4849 /* set the pointer to the MQD */ 4850 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4851 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4852 4853 /* set MQD vmid to 0 */ 4854 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); 4855 4856 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4857 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4858 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4859 4860 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4861 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 4862 4863 /* set the wb address whether it's enabled or not */ 4864 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4865 mqd->cp_hqd_pq_rptr_report_addr_lo); 4866 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4867 mqd->cp_hqd_pq_rptr_report_addr_hi); 4868 4869 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4870 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 4871 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); 4872 4873 /* enable the doorbell if requested */ 4874 if (ring->use_doorbell) { 4875 if ((adev->asic_type == CHIP_CARRIZO) || 4876 (adev->asic_type == CHIP_FIJI) || 4877 (adev->asic_type == CHIP_STONEY)) { 4878 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4879 AMDGPU_DOORBELL_KIQ << 2); 4880 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4881 AMDGPU_DOORBELL_MEC_RING7 << 2); 4882 } 4883 } 4884 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4885 4886 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4887 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4888 4889 /* set the vmid for the queue */ 4890 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4891 4892 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 4893 4894 /* activate the queue */ 4895 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4896 4897 if (ring->use_doorbell) { 4898 tmp = RREG32(mmCP_PQ_STATUS); 4899 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4900 WREG32(mmCP_PQ_STATUS, tmp); 4901 } 4902 4903 return 0; 4904 } 4905 4906 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4907 { 4908 struct amdgpu_device *adev = ring->adev; 4909 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4910 struct vi_mqd *mqd = ring->mqd_ptr; 4911 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); 4912 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4913 4914 if (is_kiq) { 4915 gfx_v8_0_kiq_setting(&kiq->ring); 4916 } else { 4917 mqd_idx = ring - &adev->gfx.compute_ring[0]; 4918 } 4919 4920 if (!adev->gfx.in_reset) { 4921 memset((void *)mqd, 0, sizeof(*mqd)); 4922 mutex_lock(&adev->srbm_mutex); 4923 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4924 gfx_v8_0_mqd_init(ring); 4925 if (is_kiq) 4926 gfx_v8_0_kiq_init_register(ring); 4927 vi_srbm_select(adev, 0, 0, 0, 0); 4928 mutex_unlock(&adev->srbm_mutex); 4929 4930 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4931 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4932 } else { /* for GPU_RESET case */ 4933 /* reset MQD to a clean status */ 4934 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4935 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4936 4937 /* reset ring buffer */ 4938 ring->wptr = 0; 4939 amdgpu_ring_clear_ring(ring); 4940 4941 if (is_kiq) { 4942 mutex_lock(&adev->srbm_mutex); 4943 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4944 gfx_v8_0_kiq_init_register(ring); 4945 vi_srbm_select(adev, 0, 0, 0, 0); 4946 mutex_unlock(&adev->srbm_mutex); 4947 } 4948 } 4949 4950 if (is_kiq) 4951 gfx_v8_0_kiq_enable(ring); 4952 else 4953 gfx_v8_0_map_queue_enable(&kiq->ring, ring); 4954 4955 return 0; 4956 } 4957 4958 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4959 { 4960 struct amdgpu_ring *ring = NULL; 4961 int r = 0, i; 4962 4963 gfx_v8_0_cp_compute_enable(adev, true); 4964 4965 ring = &adev->gfx.kiq.ring; 4966 4967 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4968 if (unlikely(r != 0)) 4969 goto done; 4970 4971 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4972 if (!r) { 4973 r = gfx_v8_0_kiq_init_queue(ring); 4974 amdgpu_bo_kunmap(ring->mqd_obj); 4975 ring->mqd_ptr = NULL; 4976 } 4977 amdgpu_bo_unreserve(ring->mqd_obj); 4978 if (r) 4979 goto done; 4980 4981 ring->ready = true; 4982 r = amdgpu_ring_test_ring(ring); 4983 if (r) { 4984 ring->ready = false; 4985 goto done; 4986 } 4987 4988 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4989 ring = &adev->gfx.compute_ring[i]; 4990 4991 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4992 if (unlikely(r != 0)) 4993 goto done; 4994 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4995 if (!r) { 4996 r = gfx_v8_0_kiq_init_queue(ring); 4997 amdgpu_bo_kunmap(ring->mqd_obj); 4998 ring->mqd_ptr = NULL; 4999 } 5000 amdgpu_bo_unreserve(ring->mqd_obj); 5001 if (r) 5002 goto done; 5003 5004 ring->ready = true; 5005 r = amdgpu_ring_test_ring(ring); 5006 if (r) 5007 ring->ready = false; 5008 } 5009 5010 done: 5011 return r; 5012 } 5013 5014 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 5015 { 5016 int r, i, j; 5017 u32 tmp; 5018 bool use_doorbell = true; 5019 u64 hqd_gpu_addr; 5020 u64 mqd_gpu_addr; 5021 u64 eop_gpu_addr; 5022 u64 wb_gpu_addr; 5023 u32 *buf; 5024 struct vi_mqd *mqd; 5025 5026 /* init the queues. */ 5027 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5028 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5029 5030 if (ring->mqd_obj == NULL) { 5031 r = amdgpu_bo_create(adev, 5032 sizeof(struct vi_mqd), 5033 PAGE_SIZE, true, 5034 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 5035 NULL, &ring->mqd_obj); 5036 if (r) { 5037 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 5038 return r; 5039 } 5040 } 5041 5042 r = amdgpu_bo_reserve(ring->mqd_obj, false); 5043 if (unlikely(r != 0)) { 5044 gfx_v8_0_cp_compute_fini(adev); 5045 return r; 5046 } 5047 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 5048 &mqd_gpu_addr); 5049 if (r) { 5050 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 5051 gfx_v8_0_cp_compute_fini(adev); 5052 return r; 5053 } 5054 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 5055 if (r) { 5056 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 5057 gfx_v8_0_cp_compute_fini(adev); 5058 return r; 5059 } 5060 5061 /* init the mqd struct */ 5062 memset(buf, 0, sizeof(struct vi_mqd)); 5063 5064 mqd = (struct vi_mqd *)buf; 5065 mqd->header = 0xC0310800; 5066 mqd->compute_pipelinestat_enable = 0x00000001; 5067 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 5068 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 5069 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 5070 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 5071 mqd->compute_misc_reserved = 0x00000003; 5072 5073 mutex_lock(&adev->srbm_mutex); 5074 vi_srbm_select(adev, ring->me, 5075 ring->pipe, 5076 ring->queue, 0); 5077 5078 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 5079 eop_gpu_addr >>= 8; 5080 5081 /* write the EOP addr */ 5082 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 5083 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 5084 5085 /* set the VMID assigned */ 5086 WREG32(mmCP_HQD_VMID, 0); 5087 5088 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 5089 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 5090 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 5091 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 5092 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 5093 5094 /* disable wptr polling */ 5095 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 5096 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 5097 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 5098 5099 mqd->cp_hqd_eop_base_addr_lo = 5100 RREG32(mmCP_HQD_EOP_BASE_ADDR); 5101 mqd->cp_hqd_eop_base_addr_hi = 5102 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 5103 5104 /* enable doorbell? */ 5105 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 5106 if (use_doorbell) { 5107 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 5108 } else { 5109 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 5110 } 5111 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 5112 mqd->cp_hqd_pq_doorbell_control = tmp; 5113 5114 /* disable the queue if it's active */ 5115 mqd->cp_hqd_dequeue_request = 0; 5116 mqd->cp_hqd_pq_rptr = 0; 5117 mqd->cp_hqd_pq_wptr= 0; 5118 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 5119 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 5120 for (j = 0; j < adev->usec_timeout; j++) { 5121 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 5122 break; 5123 udelay(1); 5124 } 5125 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 5126 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 5127 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 5128 } 5129 5130 /* set the pointer to the MQD */ 5131 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 5132 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 5133 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 5134 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 5135 5136 /* set MQD vmid to 0 */ 5137 tmp = RREG32(mmCP_MQD_CONTROL); 5138 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 5139 WREG32(mmCP_MQD_CONTROL, tmp); 5140 mqd->cp_mqd_control = tmp; 5141 5142 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 5143 hqd_gpu_addr = ring->gpu_addr >> 8; 5144 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 5145 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 5146 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 5147 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 5148 5149 /* set up the HQD, this is similar to CP_RB0_CNTL */ 5150 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 5151 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 5152 (order_base_2(ring->ring_size / 4) - 1)); 5153 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 5154 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 5155 #ifdef __BIG_ENDIAN 5156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 5157 #endif 5158 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 5159 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 5160 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 5161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 5162 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 5163 mqd->cp_hqd_pq_control = tmp; 5164 5165 /* set the wb address wether it's enabled or not */ 5166 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 5167 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 5168 mqd->cp_hqd_pq_rptr_report_addr_hi = 5169 upper_32_bits(wb_gpu_addr) & 0xffff; 5170 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 5171 mqd->cp_hqd_pq_rptr_report_addr_lo); 5172 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 5173 mqd->cp_hqd_pq_rptr_report_addr_hi); 5174 5175 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 5176 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 5177 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 5178 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 5179 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 5180 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 5181 mqd->cp_hqd_pq_wptr_poll_addr_hi); 5182 5183 /* enable the doorbell if requested */ 5184 if (use_doorbell) { 5185 if ((adev->asic_type == CHIP_CARRIZO) || 5186 (adev->asic_type == CHIP_FIJI) || 5187 (adev->asic_type == CHIP_STONEY) || 5188 (adev->asic_type == CHIP_POLARIS11) || 5189 (adev->asic_type == CHIP_POLARIS10) || 5190 (adev->asic_type == CHIP_POLARIS12)) { 5191 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 5192 AMDGPU_DOORBELL_KIQ << 2); 5193 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 5194 AMDGPU_DOORBELL_MEC_RING7 << 2); 5195 } 5196 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 5197 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 5198 DOORBELL_OFFSET, ring->doorbell_index); 5199 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 5200 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 5201 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 5202 mqd->cp_hqd_pq_doorbell_control = tmp; 5203 5204 } else { 5205 mqd->cp_hqd_pq_doorbell_control = 0; 5206 } 5207 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 5208 mqd->cp_hqd_pq_doorbell_control); 5209 5210 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 5211 ring->wptr = 0; 5212 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); 5213 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 5214 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 5215 5216 /* set the vmid for the queue */ 5217 mqd->cp_hqd_vmid = 0; 5218 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 5219 5220 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 5221 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 5222 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 5223 mqd->cp_hqd_persistent_state = tmp; 5224 if (adev->asic_type == CHIP_STONEY || 5225 adev->asic_type == CHIP_POLARIS11 || 5226 adev->asic_type == CHIP_POLARIS10 || 5227 adev->asic_type == CHIP_POLARIS12) { 5228 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 5229 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 5230 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 5231 } 5232 5233 /* activate the queue */ 5234 mqd->cp_hqd_active = 1; 5235 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 5236 5237 vi_srbm_select(adev, 0, 0, 0, 0); 5238 mutex_unlock(&adev->srbm_mutex); 5239 5240 amdgpu_bo_kunmap(ring->mqd_obj); 5241 amdgpu_bo_unreserve(ring->mqd_obj); 5242 } 5243 5244 if (use_doorbell) { 5245 tmp = RREG32(mmCP_PQ_STATUS); 5246 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 5247 WREG32(mmCP_PQ_STATUS, tmp); 5248 } 5249 5250 gfx_v8_0_cp_compute_enable(adev, true); 5251 5252 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5253 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5254 5255 ring->ready = true; 5256 r = amdgpu_ring_test_ring(ring); 5257 if (r) 5258 ring->ready = false; 5259 } 5260 5261 return 0; 5262 } 5263 5264 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5265 { 5266 int r; 5267 5268 if (!(adev->flags & AMD_IS_APU)) 5269 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5270 5271 if (!adev->pp_enabled) { 5272 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 5273 /* legacy firmware loading */ 5274 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5275 if (r) 5276 return r; 5277 5278 r = gfx_v8_0_cp_compute_load_microcode(adev); 5279 if (r) 5280 return r; 5281 } else { 5282 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5283 AMDGPU_UCODE_ID_CP_CE); 5284 if (r) 5285 return -EINVAL; 5286 5287 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5288 AMDGPU_UCODE_ID_CP_PFP); 5289 if (r) 5290 return -EINVAL; 5291 5292 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5293 AMDGPU_UCODE_ID_CP_ME); 5294 if (r) 5295 return -EINVAL; 5296 5297 if (adev->asic_type == CHIP_TOPAZ) { 5298 r = gfx_v8_0_cp_compute_load_microcode(adev); 5299 if (r) 5300 return r; 5301 } else { 5302 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5303 AMDGPU_UCODE_ID_CP_MEC1); 5304 if (r) 5305 return -EINVAL; 5306 } 5307 } 5308 } 5309 5310 r = gfx_v8_0_cp_gfx_resume(adev); 5311 if (r) 5312 return r; 5313 5314 if (amdgpu_sriov_vf(adev)) 5315 r = gfx_v8_0_kiq_resume(adev); 5316 else 5317 r = gfx_v8_0_cp_compute_resume(adev); 5318 if (r) 5319 return r; 5320 5321 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5322 5323 return 0; 5324 } 5325 5326 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5327 { 5328 gfx_v8_0_cp_gfx_enable(adev, enable); 5329 gfx_v8_0_cp_compute_enable(adev, enable); 5330 } 5331 5332 static int gfx_v8_0_hw_init(void *handle) 5333 { 5334 int r; 5335 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5336 5337 gfx_v8_0_init_golden_registers(adev); 5338 gfx_v8_0_gpu_init(adev); 5339 5340 r = gfx_v8_0_rlc_resume(adev); 5341 if (r) 5342 return r; 5343 5344 r = gfx_v8_0_cp_resume(adev); 5345 5346 return r; 5347 } 5348 5349 static int gfx_v8_0_hw_fini(void *handle) 5350 { 5351 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5352 5353 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5354 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5355 if (amdgpu_sriov_vf(adev)) { 5356 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5357 return 0; 5358 } 5359 gfx_v8_0_cp_enable(adev, false); 5360 gfx_v8_0_rlc_stop(adev); 5361 gfx_v8_0_cp_compute_fini(adev); 5362 5363 amdgpu_set_powergating_state(adev, 5364 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5365 5366 return 0; 5367 } 5368 5369 static int gfx_v8_0_suspend(void *handle) 5370 { 5371 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5372 5373 return gfx_v8_0_hw_fini(adev); 5374 } 5375 5376 static int gfx_v8_0_resume(void *handle) 5377 { 5378 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5379 5380 return gfx_v8_0_hw_init(adev); 5381 } 5382 5383 static bool gfx_v8_0_is_idle(void *handle) 5384 { 5385 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5386 5387 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5388 return false; 5389 else 5390 return true; 5391 } 5392 5393 static int gfx_v8_0_wait_for_idle(void *handle) 5394 { 5395 unsigned i; 5396 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5397 5398 for (i = 0; i < adev->usec_timeout; i++) { 5399 if (gfx_v8_0_is_idle(handle)) 5400 return 0; 5401 5402 udelay(1); 5403 } 5404 return -ETIMEDOUT; 5405 } 5406 5407 static bool gfx_v8_0_check_soft_reset(void *handle) 5408 { 5409 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5410 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5411 u32 tmp; 5412 5413 /* GRBM_STATUS */ 5414 tmp = RREG32(mmGRBM_STATUS); 5415 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5416 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5417 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5418 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5419 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5420 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5421 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5422 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5423 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5424 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5425 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5426 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5427 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5428 } 5429 5430 /* GRBM_STATUS2 */ 5431 tmp = RREG32(mmGRBM_STATUS2); 5432 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5433 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5434 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5435 5436 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5437 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5438 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5439 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5440 SOFT_RESET_CPF, 1); 5441 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5442 SOFT_RESET_CPC, 1); 5443 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5444 SOFT_RESET_CPG, 1); 5445 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5446 SOFT_RESET_GRBM, 1); 5447 } 5448 5449 /* SRBM_STATUS */ 5450 tmp = RREG32(mmSRBM_STATUS); 5451 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5452 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5453 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5454 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5455 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5456 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5457 5458 if (grbm_soft_reset || srbm_soft_reset) { 5459 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5460 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5461 return true; 5462 } else { 5463 adev->gfx.grbm_soft_reset = 0; 5464 adev->gfx.srbm_soft_reset = 0; 5465 return false; 5466 } 5467 } 5468 5469 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 5470 struct amdgpu_ring *ring) 5471 { 5472 int i; 5473 5474 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5475 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 5476 u32 tmp; 5477 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 5478 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, 5479 DEQUEUE_REQ, 2); 5480 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); 5481 for (i = 0; i < adev->usec_timeout; i++) { 5482 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 5483 break; 5484 udelay(1); 5485 } 5486 } 5487 } 5488 5489 static int gfx_v8_0_pre_soft_reset(void *handle) 5490 { 5491 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5492 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5493 5494 if ((!adev->gfx.grbm_soft_reset) && 5495 (!adev->gfx.srbm_soft_reset)) 5496 return 0; 5497 5498 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5499 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5500 5501 /* stop the rlc */ 5502 gfx_v8_0_rlc_stop(adev); 5503 5504 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5505 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5506 /* Disable GFX parsing/prefetching */ 5507 gfx_v8_0_cp_gfx_enable(adev, false); 5508 5509 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5510 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5511 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5512 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5513 int i; 5514 5515 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5516 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5517 5518 gfx_v8_0_inactive_hqd(adev, ring); 5519 } 5520 /* Disable MEC parsing/prefetching */ 5521 gfx_v8_0_cp_compute_enable(adev, false); 5522 } 5523 5524 return 0; 5525 } 5526 5527 static int gfx_v8_0_soft_reset(void *handle) 5528 { 5529 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5530 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5531 u32 tmp; 5532 5533 if ((!adev->gfx.grbm_soft_reset) && 5534 (!adev->gfx.srbm_soft_reset)) 5535 return 0; 5536 5537 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5538 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5539 5540 if (grbm_soft_reset || srbm_soft_reset) { 5541 tmp = RREG32(mmGMCON_DEBUG); 5542 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5543 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5544 WREG32(mmGMCON_DEBUG, tmp); 5545 udelay(50); 5546 } 5547 5548 if (grbm_soft_reset) { 5549 tmp = RREG32(mmGRBM_SOFT_RESET); 5550 tmp |= grbm_soft_reset; 5551 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5552 WREG32(mmGRBM_SOFT_RESET, tmp); 5553 tmp = RREG32(mmGRBM_SOFT_RESET); 5554 5555 udelay(50); 5556 5557 tmp &= ~grbm_soft_reset; 5558 WREG32(mmGRBM_SOFT_RESET, tmp); 5559 tmp = RREG32(mmGRBM_SOFT_RESET); 5560 } 5561 5562 if (srbm_soft_reset) { 5563 tmp = RREG32(mmSRBM_SOFT_RESET); 5564 tmp |= srbm_soft_reset; 5565 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5566 WREG32(mmSRBM_SOFT_RESET, tmp); 5567 tmp = RREG32(mmSRBM_SOFT_RESET); 5568 5569 udelay(50); 5570 5571 tmp &= ~srbm_soft_reset; 5572 WREG32(mmSRBM_SOFT_RESET, tmp); 5573 tmp = RREG32(mmSRBM_SOFT_RESET); 5574 } 5575 5576 if (grbm_soft_reset || srbm_soft_reset) { 5577 tmp = RREG32(mmGMCON_DEBUG); 5578 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5579 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5580 WREG32(mmGMCON_DEBUG, tmp); 5581 } 5582 5583 /* Wait a little for things to settle down */ 5584 udelay(50); 5585 5586 return 0; 5587 } 5588 5589 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5590 struct amdgpu_ring *ring) 5591 { 5592 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5593 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5594 WREG32(mmCP_HQD_PQ_RPTR, 0); 5595 WREG32(mmCP_HQD_PQ_WPTR, 0); 5596 vi_srbm_select(adev, 0, 0, 0, 0); 5597 } 5598 5599 static int gfx_v8_0_post_soft_reset(void *handle) 5600 { 5601 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5602 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5603 5604 if ((!adev->gfx.grbm_soft_reset) && 5605 (!adev->gfx.srbm_soft_reset)) 5606 return 0; 5607 5608 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5609 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5610 5611 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5612 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5613 gfx_v8_0_cp_gfx_resume(adev); 5614 5615 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5616 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5617 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5618 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5619 int i; 5620 5621 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5623 5624 gfx_v8_0_init_hqd(adev, ring); 5625 } 5626 gfx_v8_0_cp_compute_resume(adev); 5627 } 5628 gfx_v8_0_rlc_start(adev); 5629 5630 return 0; 5631 } 5632 5633 /** 5634 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5635 * 5636 * @adev: amdgpu_device pointer 5637 * 5638 * Fetches a GPU clock counter snapshot. 5639 * Returns the 64 bit clock counter snapshot. 5640 */ 5641 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5642 { 5643 uint64_t clock; 5644 5645 mutex_lock(&adev->gfx.gpu_clock_mutex); 5646 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5647 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5648 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5649 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5650 return clock; 5651 } 5652 5653 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5654 uint32_t vmid, 5655 uint32_t gds_base, uint32_t gds_size, 5656 uint32_t gws_base, uint32_t gws_size, 5657 uint32_t oa_base, uint32_t oa_size) 5658 { 5659 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5660 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5661 5662 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5663 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5664 5665 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5666 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5667 5668 /* GDS Base */ 5669 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5670 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5671 WRITE_DATA_DST_SEL(0))); 5672 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5673 amdgpu_ring_write(ring, 0); 5674 amdgpu_ring_write(ring, gds_base); 5675 5676 /* GDS Size */ 5677 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5678 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5679 WRITE_DATA_DST_SEL(0))); 5680 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5681 amdgpu_ring_write(ring, 0); 5682 amdgpu_ring_write(ring, gds_size); 5683 5684 /* GWS */ 5685 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5686 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5687 WRITE_DATA_DST_SEL(0))); 5688 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5689 amdgpu_ring_write(ring, 0); 5690 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5691 5692 /* OA */ 5693 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5694 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5695 WRITE_DATA_DST_SEL(0))); 5696 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5697 amdgpu_ring_write(ring, 0); 5698 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5699 } 5700 5701 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5702 { 5703 WREG32(mmSQ_IND_INDEX, 5704 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5705 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5706 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5707 (SQ_IND_INDEX__FORCE_READ_MASK)); 5708 return RREG32(mmSQ_IND_DATA); 5709 } 5710 5711 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5712 uint32_t wave, uint32_t thread, 5713 uint32_t regno, uint32_t num, uint32_t *out) 5714 { 5715 WREG32(mmSQ_IND_INDEX, 5716 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5717 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5718 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5719 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5720 (SQ_IND_INDEX__FORCE_READ_MASK) | 5721 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5722 while (num--) 5723 *(out++) = RREG32(mmSQ_IND_DATA); 5724 } 5725 5726 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5727 { 5728 /* type 0 wave data */ 5729 dst[(*no_fields)++] = 0; 5730 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5731 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5740 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5741 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5742 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5743 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5744 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5745 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5746 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5747 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5748 } 5749 5750 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5751 uint32_t wave, uint32_t start, 5752 uint32_t size, uint32_t *dst) 5753 { 5754 wave_read_regs( 5755 adev, simd, wave, 0, 5756 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5757 } 5758 5759 5760 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5761 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5762 .select_se_sh = &gfx_v8_0_select_se_sh, 5763 .read_wave_data = &gfx_v8_0_read_wave_data, 5764 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5765 }; 5766 5767 static int gfx_v8_0_early_init(void *handle) 5768 { 5769 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5770 5771 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5772 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5773 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5774 gfx_v8_0_set_ring_funcs(adev); 5775 gfx_v8_0_set_irq_funcs(adev); 5776 gfx_v8_0_set_gds_init(adev); 5777 gfx_v8_0_set_rlc_funcs(adev); 5778 5779 return 0; 5780 } 5781 5782 static int gfx_v8_0_late_init(void *handle) 5783 { 5784 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5785 int r; 5786 5787 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5788 if (r) 5789 return r; 5790 5791 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5792 if (r) 5793 return r; 5794 5795 /* requires IBs so do in late init after IB pool is initialized */ 5796 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5797 if (r) 5798 return r; 5799 5800 amdgpu_set_powergating_state(adev, 5801 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5802 5803 return 0; 5804 } 5805 5806 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5807 bool enable) 5808 { 5809 if ((adev->asic_type == CHIP_POLARIS11) || 5810 (adev->asic_type == CHIP_POLARIS12)) 5811 /* Send msg to SMU via Powerplay */ 5812 amdgpu_set_powergating_state(adev, 5813 AMD_IP_BLOCK_TYPE_SMC, 5814 enable ? 5815 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5816 5817 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5818 } 5819 5820 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5821 bool enable) 5822 { 5823 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5824 } 5825 5826 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5827 bool enable) 5828 { 5829 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5830 } 5831 5832 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5833 bool enable) 5834 { 5835 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5836 } 5837 5838 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5839 bool enable) 5840 { 5841 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5842 5843 /* Read any GFX register to wake up GFX. */ 5844 if (!enable) 5845 RREG32(mmDB_RENDER_CONTROL); 5846 } 5847 5848 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5849 bool enable) 5850 { 5851 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5852 cz_enable_gfx_cg_power_gating(adev, true); 5853 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5854 cz_enable_gfx_pipeline_power_gating(adev, true); 5855 } else { 5856 cz_enable_gfx_cg_power_gating(adev, false); 5857 cz_enable_gfx_pipeline_power_gating(adev, false); 5858 } 5859 } 5860 5861 static int gfx_v8_0_set_powergating_state(void *handle, 5862 enum amd_powergating_state state) 5863 { 5864 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5865 bool enable = (state == AMD_PG_STATE_GATE); 5866 5867 if (amdgpu_sriov_vf(adev)) 5868 return 0; 5869 5870 switch (adev->asic_type) { 5871 case CHIP_CARRIZO: 5872 case CHIP_STONEY: 5873 5874 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5875 cz_enable_sck_slow_down_on_power_up(adev, true); 5876 cz_enable_sck_slow_down_on_power_down(adev, true); 5877 } else { 5878 cz_enable_sck_slow_down_on_power_up(adev, false); 5879 cz_enable_sck_slow_down_on_power_down(adev, false); 5880 } 5881 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5882 cz_enable_cp_power_gating(adev, true); 5883 else 5884 cz_enable_cp_power_gating(adev, false); 5885 5886 cz_update_gfx_cg_power_gating(adev, enable); 5887 5888 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5889 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5890 else 5891 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5892 5893 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5894 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5895 else 5896 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5897 break; 5898 case CHIP_POLARIS11: 5899 case CHIP_POLARIS12: 5900 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5901 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5902 else 5903 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5904 5905 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5906 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5907 else 5908 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5909 5910 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5911 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5912 else 5913 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5914 break; 5915 default: 5916 break; 5917 } 5918 5919 return 0; 5920 } 5921 5922 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5923 { 5924 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5925 int data; 5926 5927 if (amdgpu_sriov_vf(adev)) 5928 *flags = 0; 5929 5930 /* AMD_CG_SUPPORT_GFX_MGCG */ 5931 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5932 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5933 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5934 5935 /* AMD_CG_SUPPORT_GFX_CGLG */ 5936 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5937 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5938 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5939 5940 /* AMD_CG_SUPPORT_GFX_CGLS */ 5941 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5942 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5943 5944 /* AMD_CG_SUPPORT_GFX_CGTS */ 5945 data = RREG32(mmCGTS_SM_CTRL_REG); 5946 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5947 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5948 5949 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5950 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5951 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5952 5953 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5954 data = RREG32(mmRLC_MEM_SLP_CNTL); 5955 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5956 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5957 5958 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5959 data = RREG32(mmCP_MEM_SLP_CNTL); 5960 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5961 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5962 } 5963 5964 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5965 uint32_t reg_addr, uint32_t cmd) 5966 { 5967 uint32_t data; 5968 5969 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5970 5971 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5972 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5973 5974 data = RREG32(mmRLC_SERDES_WR_CTRL); 5975 if (adev->asic_type == CHIP_STONEY) 5976 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5977 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5978 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5979 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5980 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5981 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5982 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5983 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5984 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5985 else 5986 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5987 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5988 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5989 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5990 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5991 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5992 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5993 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5994 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5995 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5996 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5997 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5998 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5999 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 6000 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 6001 6002 WREG32(mmRLC_SERDES_WR_CTRL, data); 6003 } 6004 6005 #define MSG_ENTER_RLC_SAFE_MODE 1 6006 #define MSG_EXIT_RLC_SAFE_MODE 0 6007 #define RLC_GPR_REG2__REQ_MASK 0x00000001 6008 #define RLC_GPR_REG2__REQ__SHIFT 0 6009 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 6010 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 6011 6012 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 6013 { 6014 u32 data; 6015 unsigned i; 6016 6017 data = RREG32(mmRLC_CNTL); 6018 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 6019 return; 6020 6021 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 6022 data |= RLC_SAFE_MODE__CMD_MASK; 6023 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 6024 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 6025 WREG32(mmRLC_SAFE_MODE, data); 6026 6027 for (i = 0; i < adev->usec_timeout; i++) { 6028 if ((RREG32(mmRLC_GPM_STAT) & 6029 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 6030 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 6031 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 6032 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 6033 break; 6034 udelay(1); 6035 } 6036 6037 for (i = 0; i < adev->usec_timeout; i++) { 6038 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 6039 break; 6040 udelay(1); 6041 } 6042 adev->gfx.rlc.in_safe_mode = true; 6043 } 6044 } 6045 6046 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 6047 { 6048 u32 data = 0; 6049 unsigned i; 6050 6051 data = RREG32(mmRLC_CNTL); 6052 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 6053 return; 6054 6055 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 6056 if (adev->gfx.rlc.in_safe_mode) { 6057 data |= RLC_SAFE_MODE__CMD_MASK; 6058 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 6059 WREG32(mmRLC_SAFE_MODE, data); 6060 adev->gfx.rlc.in_safe_mode = false; 6061 } 6062 } 6063 6064 for (i = 0; i < adev->usec_timeout; i++) { 6065 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 6066 break; 6067 udelay(1); 6068 } 6069 } 6070 6071 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 6072 .enter_safe_mode = iceland_enter_rlc_safe_mode, 6073 .exit_safe_mode = iceland_exit_rlc_safe_mode 6074 }; 6075 6076 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 6077 bool enable) 6078 { 6079 uint32_t temp, data; 6080 6081 adev->gfx.rlc.funcs->enter_safe_mode(adev); 6082 6083 /* It is disabled by HW by default */ 6084 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 6085 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6086 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 6087 /* 1 - RLC memory Light sleep */ 6088 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 6089 6090 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 6091 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 6092 } 6093 6094 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 6095 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6096 if (adev->flags & AMD_IS_APU) 6097 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6098 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6099 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 6100 else 6101 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6102 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6103 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 6104 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 6105 6106 if (temp != data) 6107 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 6108 6109 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6110 gfx_v8_0_wait_for_rlc_serdes(adev); 6111 6112 /* 5 - clear mgcg override */ 6113 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6114 6115 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 6116 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 6117 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 6118 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 6119 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 6120 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 6121 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 6122 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 6123 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 6124 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 6125 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 6126 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 6127 if (temp != data) 6128 WREG32(mmCGTS_SM_CTRL_REG, data); 6129 } 6130 udelay(50); 6131 6132 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6133 gfx_v8_0_wait_for_rlc_serdes(adev); 6134 } else { 6135 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 6136 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6137 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6138 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6139 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 6140 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 6141 if (temp != data) 6142 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 6143 6144 /* 2 - disable MGLS in RLC */ 6145 data = RREG32(mmRLC_MEM_SLP_CNTL); 6146 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 6147 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 6148 WREG32(mmRLC_MEM_SLP_CNTL, data); 6149 } 6150 6151 /* 3 - disable MGLS in CP */ 6152 data = RREG32(mmCP_MEM_SLP_CNTL); 6153 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 6154 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 6155 WREG32(mmCP_MEM_SLP_CNTL, data); 6156 } 6157 6158 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 6159 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 6160 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 6161 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 6162 if (temp != data) 6163 WREG32(mmCGTS_SM_CTRL_REG, data); 6164 6165 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6166 gfx_v8_0_wait_for_rlc_serdes(adev); 6167 6168 /* 6 - set mgcg override */ 6169 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6170 6171 udelay(50); 6172 6173 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6174 gfx_v8_0_wait_for_rlc_serdes(adev); 6175 } 6176 6177 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6178 } 6179 6180 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 6181 bool enable) 6182 { 6183 uint32_t temp, temp1, data, data1; 6184 6185 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 6186 6187 adev->gfx.rlc.funcs->enter_safe_mode(adev); 6188 6189 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 6190 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6191 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 6192 if (temp1 != data1) 6193 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6194 6195 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6196 gfx_v8_0_wait_for_rlc_serdes(adev); 6197 6198 /* 2 - clear cgcg override */ 6199 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6200 6201 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6202 gfx_v8_0_wait_for_rlc_serdes(adev); 6203 6204 /* 3 - write cmd to set CGLS */ 6205 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6206 6207 /* 4 - enable cgcg */ 6208 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6209 6210 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6211 /* enable cgls*/ 6212 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6213 6214 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6215 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6216 6217 if (temp1 != data1) 6218 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6219 } else { 6220 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6221 } 6222 6223 if (temp != data) 6224 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6225 6226 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6227 * Cmp_busy/GFX_Idle interrupts 6228 */ 6229 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6230 } else { 6231 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6232 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6233 6234 /* TEST CGCG */ 6235 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6236 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6237 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6238 if (temp1 != data1) 6239 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6240 6241 /* read gfx register to wake up cgcg */ 6242 RREG32(mmCB_CGTT_SCLK_CTRL); 6243 RREG32(mmCB_CGTT_SCLK_CTRL); 6244 RREG32(mmCB_CGTT_SCLK_CTRL); 6245 RREG32(mmCB_CGTT_SCLK_CTRL); 6246 6247 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6248 gfx_v8_0_wait_for_rlc_serdes(adev); 6249 6250 /* write cmd to Set CGCG Overrride */ 6251 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6252 6253 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6254 gfx_v8_0_wait_for_rlc_serdes(adev); 6255 6256 /* write cmd to Clear CGLS */ 6257 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6258 6259 /* disable cgcg, cgls should be disabled too. */ 6260 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6261 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6262 if (temp != data) 6263 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6264 } 6265 6266 gfx_v8_0_wait_for_rlc_serdes(adev); 6267 6268 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6269 } 6270 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6271 bool enable) 6272 { 6273 if (enable) { 6274 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6275 * === MGCG + MGLS + TS(CG/LS) === 6276 */ 6277 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6278 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6279 } else { 6280 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6281 * === CGCG + CGLS === 6282 */ 6283 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6284 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6285 } 6286 return 0; 6287 } 6288 6289 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6290 enum amd_clockgating_state state) 6291 { 6292 uint32_t msg_id, pp_state = 0; 6293 uint32_t pp_support_state = 0; 6294 void *pp_handle = adev->powerplay.pp_handle; 6295 6296 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6297 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6298 pp_support_state = PP_STATE_SUPPORT_LS; 6299 pp_state = PP_STATE_LS; 6300 } 6301 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6302 pp_support_state |= PP_STATE_SUPPORT_CG; 6303 pp_state |= PP_STATE_CG; 6304 } 6305 if (state == AMD_CG_STATE_UNGATE) 6306 pp_state = 0; 6307 6308 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6309 PP_BLOCK_GFX_CG, 6310 pp_support_state, 6311 pp_state); 6312 amd_set_clockgating_by_smu(pp_handle, msg_id); 6313 } 6314 6315 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6316 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6317 pp_support_state = PP_STATE_SUPPORT_LS; 6318 pp_state = PP_STATE_LS; 6319 } 6320 6321 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6322 pp_support_state |= PP_STATE_SUPPORT_CG; 6323 pp_state |= PP_STATE_CG; 6324 } 6325 6326 if (state == AMD_CG_STATE_UNGATE) 6327 pp_state = 0; 6328 6329 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6330 PP_BLOCK_GFX_MG, 6331 pp_support_state, 6332 pp_state); 6333 amd_set_clockgating_by_smu(pp_handle, msg_id); 6334 } 6335 6336 return 0; 6337 } 6338 6339 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6340 enum amd_clockgating_state state) 6341 { 6342 6343 uint32_t msg_id, pp_state = 0; 6344 uint32_t pp_support_state = 0; 6345 void *pp_handle = adev->powerplay.pp_handle; 6346 6347 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6348 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6349 pp_support_state = PP_STATE_SUPPORT_LS; 6350 pp_state = PP_STATE_LS; 6351 } 6352 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6353 pp_support_state |= PP_STATE_SUPPORT_CG; 6354 pp_state |= PP_STATE_CG; 6355 } 6356 if (state == AMD_CG_STATE_UNGATE) 6357 pp_state = 0; 6358 6359 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6360 PP_BLOCK_GFX_CG, 6361 pp_support_state, 6362 pp_state); 6363 amd_set_clockgating_by_smu(pp_handle, msg_id); 6364 } 6365 6366 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6367 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6368 pp_support_state = PP_STATE_SUPPORT_LS; 6369 pp_state = PP_STATE_LS; 6370 } 6371 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6372 pp_support_state |= PP_STATE_SUPPORT_CG; 6373 pp_state |= PP_STATE_CG; 6374 } 6375 if (state == AMD_CG_STATE_UNGATE) 6376 pp_state = 0; 6377 6378 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6379 PP_BLOCK_GFX_3D, 6380 pp_support_state, 6381 pp_state); 6382 amd_set_clockgating_by_smu(pp_handle, msg_id); 6383 } 6384 6385 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6386 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6387 pp_support_state = PP_STATE_SUPPORT_LS; 6388 pp_state = PP_STATE_LS; 6389 } 6390 6391 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6392 pp_support_state |= PP_STATE_SUPPORT_CG; 6393 pp_state |= PP_STATE_CG; 6394 } 6395 6396 if (state == AMD_CG_STATE_UNGATE) 6397 pp_state = 0; 6398 6399 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6400 PP_BLOCK_GFX_MG, 6401 pp_support_state, 6402 pp_state); 6403 amd_set_clockgating_by_smu(pp_handle, msg_id); 6404 } 6405 6406 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6407 pp_support_state = PP_STATE_SUPPORT_LS; 6408 6409 if (state == AMD_CG_STATE_UNGATE) 6410 pp_state = 0; 6411 else 6412 pp_state = PP_STATE_LS; 6413 6414 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6415 PP_BLOCK_GFX_RLC, 6416 pp_support_state, 6417 pp_state); 6418 amd_set_clockgating_by_smu(pp_handle, msg_id); 6419 } 6420 6421 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6422 pp_support_state = PP_STATE_SUPPORT_LS; 6423 6424 if (state == AMD_CG_STATE_UNGATE) 6425 pp_state = 0; 6426 else 6427 pp_state = PP_STATE_LS; 6428 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6429 PP_BLOCK_GFX_CP, 6430 pp_support_state, 6431 pp_state); 6432 amd_set_clockgating_by_smu(pp_handle, msg_id); 6433 } 6434 6435 return 0; 6436 } 6437 6438 static int gfx_v8_0_set_clockgating_state(void *handle, 6439 enum amd_clockgating_state state) 6440 { 6441 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6442 6443 if (amdgpu_sriov_vf(adev)) 6444 return 0; 6445 6446 switch (adev->asic_type) { 6447 case CHIP_FIJI: 6448 case CHIP_CARRIZO: 6449 case CHIP_STONEY: 6450 gfx_v8_0_update_gfx_clock_gating(adev, 6451 state == AMD_CG_STATE_GATE); 6452 break; 6453 case CHIP_TONGA: 6454 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6455 break; 6456 case CHIP_POLARIS10: 6457 case CHIP_POLARIS11: 6458 case CHIP_POLARIS12: 6459 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6460 break; 6461 default: 6462 break; 6463 } 6464 return 0; 6465 } 6466 6467 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6468 { 6469 return ring->adev->wb.wb[ring->rptr_offs]; 6470 } 6471 6472 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6473 { 6474 struct amdgpu_device *adev = ring->adev; 6475 6476 if (ring->use_doorbell) 6477 /* XXX check if swapping is necessary on BE */ 6478 return ring->adev->wb.wb[ring->wptr_offs]; 6479 else 6480 return RREG32(mmCP_RB0_WPTR); 6481 } 6482 6483 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6484 { 6485 struct amdgpu_device *adev = ring->adev; 6486 6487 if (ring->use_doorbell) { 6488 /* XXX check if swapping is necessary on BE */ 6489 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6490 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6491 } else { 6492 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6493 (void)RREG32(mmCP_RB0_WPTR); 6494 } 6495 } 6496 6497 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6498 { 6499 u32 ref_and_mask, reg_mem_engine; 6500 6501 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6502 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6503 switch (ring->me) { 6504 case 1: 6505 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6506 break; 6507 case 2: 6508 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6509 break; 6510 default: 6511 return; 6512 } 6513 reg_mem_engine = 0; 6514 } else { 6515 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6516 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6517 } 6518 6519 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6520 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6521 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6522 reg_mem_engine)); 6523 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6524 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6525 amdgpu_ring_write(ring, ref_and_mask); 6526 amdgpu_ring_write(ring, ref_and_mask); 6527 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6528 } 6529 6530 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6531 { 6532 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6533 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6534 EVENT_INDEX(4)); 6535 6536 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6537 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6538 EVENT_INDEX(0)); 6539 } 6540 6541 6542 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6543 { 6544 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6545 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6546 WRITE_DATA_DST_SEL(0) | 6547 WR_CONFIRM)); 6548 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6549 amdgpu_ring_write(ring, 0); 6550 amdgpu_ring_write(ring, 1); 6551 6552 } 6553 6554 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6555 struct amdgpu_ib *ib, 6556 unsigned vm_id, bool ctx_switch) 6557 { 6558 u32 header, control = 0; 6559 6560 if (ib->flags & AMDGPU_IB_FLAG_CE) 6561 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6562 else 6563 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6564 6565 control |= ib->length_dw | (vm_id << 24); 6566 6567 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) 6568 control |= INDIRECT_BUFFER_PRE_ENB(1); 6569 6570 amdgpu_ring_write(ring, header); 6571 amdgpu_ring_write(ring, 6572 #ifdef __BIG_ENDIAN 6573 (2 << 0) | 6574 #endif 6575 (ib->gpu_addr & 0xFFFFFFFC)); 6576 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6577 amdgpu_ring_write(ring, control); 6578 } 6579 6580 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6581 struct amdgpu_ib *ib, 6582 unsigned vm_id, bool ctx_switch) 6583 { 6584 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6585 6586 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6587 amdgpu_ring_write(ring, 6588 #ifdef __BIG_ENDIAN 6589 (2 << 0) | 6590 #endif 6591 (ib->gpu_addr & 0xFFFFFFFC)); 6592 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6593 amdgpu_ring_write(ring, control); 6594 } 6595 6596 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6597 u64 seq, unsigned flags) 6598 { 6599 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6600 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6601 6602 /* EVENT_WRITE_EOP - flush caches, send int */ 6603 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6604 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6605 EOP_TC_ACTION_EN | 6606 EOP_TC_WB_ACTION_EN | 6607 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6608 EVENT_INDEX(5))); 6609 amdgpu_ring_write(ring, addr & 0xfffffffc); 6610 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6611 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6612 amdgpu_ring_write(ring, lower_32_bits(seq)); 6613 amdgpu_ring_write(ring, upper_32_bits(seq)); 6614 6615 } 6616 6617 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6618 { 6619 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6620 uint32_t seq = ring->fence_drv.sync_seq; 6621 uint64_t addr = ring->fence_drv.gpu_addr; 6622 6623 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6624 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6625 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6626 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6627 amdgpu_ring_write(ring, addr & 0xfffffffc); 6628 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6629 amdgpu_ring_write(ring, seq); 6630 amdgpu_ring_write(ring, 0xffffffff); 6631 amdgpu_ring_write(ring, 4); /* poll interval */ 6632 } 6633 6634 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6635 unsigned vm_id, uint64_t pd_addr) 6636 { 6637 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6638 6639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6640 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6641 WRITE_DATA_DST_SEL(0)) | 6642 WR_CONFIRM); 6643 if (vm_id < 8) { 6644 amdgpu_ring_write(ring, 6645 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6646 } else { 6647 amdgpu_ring_write(ring, 6648 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6649 } 6650 amdgpu_ring_write(ring, 0); 6651 amdgpu_ring_write(ring, pd_addr >> 12); 6652 6653 /* bits 0-15 are the VM contexts0-15 */ 6654 /* invalidate the cache */ 6655 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6656 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6657 WRITE_DATA_DST_SEL(0))); 6658 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6659 amdgpu_ring_write(ring, 0); 6660 amdgpu_ring_write(ring, 1 << vm_id); 6661 6662 /* wait for the invalidate to complete */ 6663 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6664 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6665 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6666 WAIT_REG_MEM_ENGINE(0))); /* me */ 6667 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6668 amdgpu_ring_write(ring, 0); 6669 amdgpu_ring_write(ring, 0); /* ref */ 6670 amdgpu_ring_write(ring, 0); /* mask */ 6671 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6672 6673 /* compute doesn't have PFP */ 6674 if (usepfp) { 6675 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6676 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6677 amdgpu_ring_write(ring, 0x0); 6678 } 6679 } 6680 6681 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6682 { 6683 return ring->adev->wb.wb[ring->wptr_offs]; 6684 } 6685 6686 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6687 { 6688 struct amdgpu_device *adev = ring->adev; 6689 6690 /* XXX check if swapping is necessary on BE */ 6691 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6692 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6693 } 6694 6695 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6696 u64 addr, u64 seq, 6697 unsigned flags) 6698 { 6699 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6700 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6701 6702 /* RELEASE_MEM - flush caches, send int */ 6703 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6704 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6705 EOP_TC_ACTION_EN | 6706 EOP_TC_WB_ACTION_EN | 6707 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6708 EVENT_INDEX(5))); 6709 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6710 amdgpu_ring_write(ring, addr & 0xfffffffc); 6711 amdgpu_ring_write(ring, upper_32_bits(addr)); 6712 amdgpu_ring_write(ring, lower_32_bits(seq)); 6713 amdgpu_ring_write(ring, upper_32_bits(seq)); 6714 } 6715 6716 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6717 u64 seq, unsigned int flags) 6718 { 6719 /* we only allocate 32bit for each seq wb address */ 6720 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6721 6722 /* write fence seq to the "addr" */ 6723 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6724 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6725 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6726 amdgpu_ring_write(ring, lower_32_bits(addr)); 6727 amdgpu_ring_write(ring, upper_32_bits(addr)); 6728 amdgpu_ring_write(ring, lower_32_bits(seq)); 6729 6730 if (flags & AMDGPU_FENCE_FLAG_INT) { 6731 /* set register to trigger INT */ 6732 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6733 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6734 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6735 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6736 amdgpu_ring_write(ring, 0); 6737 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6738 } 6739 } 6740 6741 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6742 { 6743 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6744 amdgpu_ring_write(ring, 0); 6745 } 6746 6747 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6748 { 6749 uint32_t dw2 = 0; 6750 6751 if (amdgpu_sriov_vf(ring->adev)) 6752 gfx_v8_0_ring_emit_ce_meta_init(ring, 6753 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); 6754 6755 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6756 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6757 gfx_v8_0_ring_emit_vgt_flush(ring); 6758 /* set load_global_config & load_global_uconfig */ 6759 dw2 |= 0x8001; 6760 /* set load_cs_sh_regs */ 6761 dw2 |= 0x01000000; 6762 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6763 dw2 |= 0x10002; 6764 6765 /* set load_ce_ram if preamble presented */ 6766 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6767 dw2 |= 0x10000000; 6768 } else { 6769 /* still load_ce_ram if this is the first time preamble presented 6770 * although there is no context switch happens. 6771 */ 6772 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6773 dw2 |= 0x10000000; 6774 } 6775 6776 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6777 amdgpu_ring_write(ring, dw2); 6778 amdgpu_ring_write(ring, 0); 6779 6780 if (amdgpu_sriov_vf(ring->adev)) 6781 gfx_v8_0_ring_emit_de_meta_init(ring, 6782 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); 6783 } 6784 6785 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6786 { 6787 unsigned ret; 6788 6789 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6790 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6791 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6792 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6793 ret = ring->wptr & ring->buf_mask; 6794 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6795 return ret; 6796 } 6797 6798 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6799 { 6800 unsigned cur; 6801 6802 BUG_ON(offset > ring->buf_mask); 6803 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6804 6805 cur = (ring->wptr & ring->buf_mask) - 1; 6806 if (likely(cur > offset)) 6807 ring->ring[offset] = cur - offset; 6808 else 6809 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6810 } 6811 6812 6813 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6814 { 6815 struct amdgpu_device *adev = ring->adev; 6816 6817 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6818 amdgpu_ring_write(ring, 0 | /* src: register*/ 6819 (5 << 8) | /* dst: memory */ 6820 (1 << 20)); /* write confirm */ 6821 amdgpu_ring_write(ring, reg); 6822 amdgpu_ring_write(ring, 0); 6823 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6824 adev->virt.reg_val_offs * 4)); 6825 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6826 adev->virt.reg_val_offs * 4)); 6827 } 6828 6829 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6830 uint32_t val) 6831 { 6832 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6833 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6834 amdgpu_ring_write(ring, reg); 6835 amdgpu_ring_write(ring, 0); 6836 amdgpu_ring_write(ring, val); 6837 } 6838 6839 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6840 enum amdgpu_interrupt_state state) 6841 { 6842 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6843 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6844 } 6845 6846 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6847 int me, int pipe, 6848 enum amdgpu_interrupt_state state) 6849 { 6850 /* 6851 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6852 * handles the setting of interrupts for this specific pipe. All other 6853 * pipes' interrupts are set by amdkfd. 6854 */ 6855 6856 if (me == 1) { 6857 switch (pipe) { 6858 case 0: 6859 break; 6860 default: 6861 DRM_DEBUG("invalid pipe %d\n", pipe); 6862 return; 6863 } 6864 } else { 6865 DRM_DEBUG("invalid me %d\n", me); 6866 return; 6867 } 6868 6869 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6870 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6871 } 6872 6873 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6874 struct amdgpu_irq_src *source, 6875 unsigned type, 6876 enum amdgpu_interrupt_state state) 6877 { 6878 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6879 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6880 6881 return 0; 6882 } 6883 6884 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6885 struct amdgpu_irq_src *source, 6886 unsigned type, 6887 enum amdgpu_interrupt_state state) 6888 { 6889 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6890 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6891 6892 return 0; 6893 } 6894 6895 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6896 struct amdgpu_irq_src *src, 6897 unsigned type, 6898 enum amdgpu_interrupt_state state) 6899 { 6900 switch (type) { 6901 case AMDGPU_CP_IRQ_GFX_EOP: 6902 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6903 break; 6904 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6905 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6906 break; 6907 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6908 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6909 break; 6910 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6911 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6912 break; 6913 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6914 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6915 break; 6916 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6917 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6918 break; 6919 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6920 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6921 break; 6922 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6923 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6924 break; 6925 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6926 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6927 break; 6928 default: 6929 break; 6930 } 6931 return 0; 6932 } 6933 6934 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6935 struct amdgpu_irq_src *source, 6936 struct amdgpu_iv_entry *entry) 6937 { 6938 int i; 6939 u8 me_id, pipe_id, queue_id; 6940 struct amdgpu_ring *ring; 6941 6942 DRM_DEBUG("IH: CP EOP\n"); 6943 me_id = (entry->ring_id & 0x0c) >> 2; 6944 pipe_id = (entry->ring_id & 0x03) >> 0; 6945 queue_id = (entry->ring_id & 0x70) >> 4; 6946 6947 switch (me_id) { 6948 case 0: 6949 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6950 break; 6951 case 1: 6952 case 2: 6953 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6954 ring = &adev->gfx.compute_ring[i]; 6955 /* Per-queue interrupt is supported for MEC starting from VI. 6956 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6957 */ 6958 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6959 amdgpu_fence_process(ring); 6960 } 6961 break; 6962 } 6963 return 0; 6964 } 6965 6966 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6967 struct amdgpu_irq_src *source, 6968 struct amdgpu_iv_entry *entry) 6969 { 6970 DRM_ERROR("Illegal register access in command stream\n"); 6971 schedule_work(&adev->reset_work); 6972 return 0; 6973 } 6974 6975 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6976 struct amdgpu_irq_src *source, 6977 struct amdgpu_iv_entry *entry) 6978 { 6979 DRM_ERROR("Illegal instruction in command stream\n"); 6980 schedule_work(&adev->reset_work); 6981 return 0; 6982 } 6983 6984 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6985 struct amdgpu_irq_src *src, 6986 unsigned int type, 6987 enum amdgpu_interrupt_state state) 6988 { 6989 uint32_t tmp, target; 6990 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6991 6992 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); 6993 6994 if (ring->me == 1) 6995 target = mmCP_ME1_PIPE0_INT_CNTL; 6996 else 6997 target = mmCP_ME2_PIPE0_INT_CNTL; 6998 target += ring->pipe; 6999 7000 switch (type) { 7001 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 7002 if (state == AMDGPU_IRQ_STATE_DISABLE) { 7003 tmp = RREG32(mmCPC_INT_CNTL); 7004 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 7005 GENERIC2_INT_ENABLE, 0); 7006 WREG32(mmCPC_INT_CNTL, tmp); 7007 7008 tmp = RREG32(target); 7009 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 7010 GENERIC2_INT_ENABLE, 0); 7011 WREG32(target, tmp); 7012 } else { 7013 tmp = RREG32(mmCPC_INT_CNTL); 7014 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 7015 GENERIC2_INT_ENABLE, 1); 7016 WREG32(mmCPC_INT_CNTL, tmp); 7017 7018 tmp = RREG32(target); 7019 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 7020 GENERIC2_INT_ENABLE, 1); 7021 WREG32(target, tmp); 7022 } 7023 break; 7024 default: 7025 BUG(); /* kiq only support GENERIC2_INT now */ 7026 break; 7027 } 7028 return 0; 7029 } 7030 7031 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7032 struct amdgpu_irq_src *source, 7033 struct amdgpu_iv_entry *entry) 7034 { 7035 u8 me_id, pipe_id, queue_id; 7036 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7037 7038 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); 7039 7040 me_id = (entry->ring_id & 0x0c) >> 2; 7041 pipe_id = (entry->ring_id & 0x03) >> 0; 7042 queue_id = (entry->ring_id & 0x70) >> 4; 7043 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7044 me_id, pipe_id, queue_id); 7045 7046 amdgpu_fence_process(ring); 7047 return 0; 7048 } 7049 7050 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7051 .name = "gfx_v8_0", 7052 .early_init = gfx_v8_0_early_init, 7053 .late_init = gfx_v8_0_late_init, 7054 .sw_init = gfx_v8_0_sw_init, 7055 .sw_fini = gfx_v8_0_sw_fini, 7056 .hw_init = gfx_v8_0_hw_init, 7057 .hw_fini = gfx_v8_0_hw_fini, 7058 .suspend = gfx_v8_0_suspend, 7059 .resume = gfx_v8_0_resume, 7060 .is_idle = gfx_v8_0_is_idle, 7061 .wait_for_idle = gfx_v8_0_wait_for_idle, 7062 .check_soft_reset = gfx_v8_0_check_soft_reset, 7063 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7064 .soft_reset = gfx_v8_0_soft_reset, 7065 .post_soft_reset = gfx_v8_0_post_soft_reset, 7066 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7067 .set_powergating_state = gfx_v8_0_set_powergating_state, 7068 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7069 }; 7070 7071 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7072 .type = AMDGPU_RING_TYPE_GFX, 7073 .align_mask = 0xff, 7074 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7075 .support_64bit_ptrs = false, 7076 .get_rptr = gfx_v8_0_ring_get_rptr, 7077 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7078 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7079 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7080 5 + /* COND_EXEC */ 7081 7 + /* PIPELINE_SYNC */ 7082 19 + /* VM_FLUSH */ 7083 8 + /* FENCE for VM_FLUSH */ 7084 20 + /* GDS switch */ 7085 4 + /* double SWITCH_BUFFER, 7086 the first COND_EXEC jump to the place just 7087 prior to this double SWITCH_BUFFER */ 7088 5 + /* COND_EXEC */ 7089 7 + /* HDP_flush */ 7090 4 + /* VGT_flush */ 7091 14 + /* CE_META */ 7092 31 + /* DE_META */ 7093 3 + /* CNTX_CTRL */ 7094 5 + /* HDP_INVL */ 7095 8 + 8 + /* FENCE x2 */ 7096 2, /* SWITCH_BUFFER */ 7097 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7098 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7099 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7100 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7101 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7102 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7103 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7104 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7105 .test_ring = gfx_v8_0_ring_test_ring, 7106 .test_ib = gfx_v8_0_ring_test_ib, 7107 .insert_nop = amdgpu_ring_insert_nop, 7108 .pad_ib = amdgpu_ring_generic_pad_ib, 7109 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7110 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7111 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7112 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7113 }; 7114 7115 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7116 .type = AMDGPU_RING_TYPE_COMPUTE, 7117 .align_mask = 0xff, 7118 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7119 .support_64bit_ptrs = false, 7120 .get_rptr = gfx_v8_0_ring_get_rptr, 7121 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7122 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7123 .emit_frame_size = 7124 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7125 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7126 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7127 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7128 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7129 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7130 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7131 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7132 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7133 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7134 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7135 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7136 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7137 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7138 .test_ring = gfx_v8_0_ring_test_ring, 7139 .test_ib = gfx_v8_0_ring_test_ib, 7140 .insert_nop = amdgpu_ring_insert_nop, 7141 .pad_ib = amdgpu_ring_generic_pad_ib, 7142 }; 7143 7144 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7145 .type = AMDGPU_RING_TYPE_KIQ, 7146 .align_mask = 0xff, 7147 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7148 .support_64bit_ptrs = false, 7149 .get_rptr = gfx_v8_0_ring_get_rptr, 7150 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7151 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7152 .emit_frame_size = 7153 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7154 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7155 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7156 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7157 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7158 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7159 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7160 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7161 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7162 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7163 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7164 .test_ring = gfx_v8_0_ring_test_ring, 7165 .test_ib = gfx_v8_0_ring_test_ib, 7166 .insert_nop = amdgpu_ring_insert_nop, 7167 .pad_ib = amdgpu_ring_generic_pad_ib, 7168 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7169 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7170 }; 7171 7172 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7173 { 7174 int i; 7175 7176 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7177 7178 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7179 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7180 7181 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7182 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7183 } 7184 7185 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7186 .set = gfx_v8_0_set_eop_interrupt_state, 7187 .process = gfx_v8_0_eop_irq, 7188 }; 7189 7190 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7191 .set = gfx_v8_0_set_priv_reg_fault_state, 7192 .process = gfx_v8_0_priv_reg_irq, 7193 }; 7194 7195 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7196 .set = gfx_v8_0_set_priv_inst_fault_state, 7197 .process = gfx_v8_0_priv_inst_irq, 7198 }; 7199 7200 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7201 .set = gfx_v8_0_kiq_set_interrupt_state, 7202 .process = gfx_v8_0_kiq_irq, 7203 }; 7204 7205 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7206 { 7207 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7208 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7209 7210 adev->gfx.priv_reg_irq.num_types = 1; 7211 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7212 7213 adev->gfx.priv_inst_irq.num_types = 1; 7214 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7215 7216 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7217 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7218 } 7219 7220 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7221 { 7222 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7223 } 7224 7225 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7226 { 7227 /* init asci gds info */ 7228 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7229 adev->gds.gws.total_size = 64; 7230 adev->gds.oa.total_size = 16; 7231 7232 if (adev->gds.mem.total_size == 64 * 1024) { 7233 adev->gds.mem.gfx_partition_size = 4096; 7234 adev->gds.mem.cs_partition_size = 4096; 7235 7236 adev->gds.gws.gfx_partition_size = 4; 7237 adev->gds.gws.cs_partition_size = 4; 7238 7239 adev->gds.oa.gfx_partition_size = 4; 7240 adev->gds.oa.cs_partition_size = 1; 7241 } else { 7242 adev->gds.mem.gfx_partition_size = 1024; 7243 adev->gds.mem.cs_partition_size = 1024; 7244 7245 adev->gds.gws.gfx_partition_size = 16; 7246 adev->gds.gws.cs_partition_size = 16; 7247 7248 adev->gds.oa.gfx_partition_size = 4; 7249 adev->gds.oa.cs_partition_size = 4; 7250 } 7251 } 7252 7253 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7254 u32 bitmap) 7255 { 7256 u32 data; 7257 7258 if (!bitmap) 7259 return; 7260 7261 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7262 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7263 7264 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7265 } 7266 7267 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7268 { 7269 u32 data, mask; 7270 7271 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7272 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7273 7274 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 7275 7276 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7277 } 7278 7279 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7280 { 7281 int i, j, k, counter, active_cu_number = 0; 7282 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7283 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7284 unsigned disable_masks[4 * 2]; 7285 7286 memset(cu_info, 0, sizeof(*cu_info)); 7287 7288 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7289 7290 mutex_lock(&adev->grbm_idx_mutex); 7291 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7292 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7293 mask = 1; 7294 ao_bitmap = 0; 7295 counter = 0; 7296 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7297 if (i < 4 && j < 2) 7298 gfx_v8_0_set_user_cu_inactive_bitmap( 7299 adev, disable_masks[i * 2 + j]); 7300 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7301 cu_info->bitmap[i][j] = bitmap; 7302 7303 for (k = 0; k < 16; k ++) { 7304 if (bitmap & mask) { 7305 if (counter < 2) 7306 ao_bitmap |= mask; 7307 counter ++; 7308 } 7309 mask <<= 1; 7310 } 7311 active_cu_number += counter; 7312 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7313 } 7314 } 7315 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7316 mutex_unlock(&adev->grbm_idx_mutex); 7317 7318 cu_info->number = active_cu_number; 7319 cu_info->ao_cu_mask = ao_cu_mask; 7320 } 7321 7322 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7323 { 7324 .type = AMD_IP_BLOCK_TYPE_GFX, 7325 .major = 8, 7326 .minor = 0, 7327 .rev = 0, 7328 .funcs = &gfx_v8_0_ip_funcs, 7329 }; 7330 7331 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7332 { 7333 .type = AMD_IP_BLOCK_TYPE_GFX, 7334 .major = 8, 7335 .minor = 1, 7336 .rev = 0, 7337 .funcs = &gfx_v8_0_ip_funcs, 7338 }; 7339 7340 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7341 { 7342 uint64_t ce_payload_addr; 7343 int cnt_ce; 7344 static union { 7345 struct vi_ce_ib_state regular; 7346 struct vi_ce_ib_state_chained_ib chained; 7347 } ce_payload = {}; 7348 7349 if (ring->adev->virt.chained_ib_support) { 7350 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7351 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7352 } else { 7353 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); 7354 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7355 } 7356 7357 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7358 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7359 WRITE_DATA_DST_SEL(8) | 7360 WR_CONFIRM) | 7361 WRITE_DATA_CACHE_POLICY(0)); 7362 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7363 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7364 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7365 } 7366 7367 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7368 { 7369 uint64_t de_payload_addr, gds_addr; 7370 int cnt_de; 7371 static union { 7372 struct vi_de_ib_state regular; 7373 struct vi_de_ib_state_chained_ib chained; 7374 } de_payload = {}; 7375 7376 gds_addr = csa_addr + 4096; 7377 if (ring->adev->virt.chained_ib_support) { 7378 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7379 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7380 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7381 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7382 } else { 7383 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7384 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7385 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7386 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7387 } 7388 7389 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7390 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7391 WRITE_DATA_DST_SEL(8) | 7392 WR_CONFIRM) | 7393 WRITE_DATA_CACHE_POLICY(0)); 7394 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7395 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7396 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7397 } 7398 7399 /* create MQD for each compute queue */ 7400 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) 7401 { 7402 struct amdgpu_ring *ring = NULL; 7403 int r, i; 7404 7405 /* create MQD for KIQ */ 7406 ring = &adev->gfx.kiq.ring; 7407 if (!ring->mqd_obj) { 7408 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, 7409 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 7410 &ring->mqd_gpu_addr, &ring->mqd_ptr); 7411 if (r) { 7412 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 7413 return r; 7414 } 7415 7416 /* prepare MQD backup */ 7417 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); 7418 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) 7419 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 7420 } 7421 7422 /* create MQD for each KCQ */ 7423 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 7424 ring = &adev->gfx.compute_ring[i]; 7425 if (!ring->mqd_obj) { 7426 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, 7427 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 7428 &ring->mqd_gpu_addr, &ring->mqd_ptr); 7429 if (r) { 7430 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 7431 return r; 7432 } 7433 7434 /* prepare MQD backup */ 7435 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); 7436 if (!adev->gfx.mec.mqd_backup[i]) 7437 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 7438 } 7439 } 7440 7441 return 0; 7442 } 7443 7444 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) 7445 { 7446 struct amdgpu_ring *ring = NULL; 7447 int i; 7448 7449 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 7450 ring = &adev->gfx.compute_ring[i]; 7451 kfree(adev->gfx.mec.mqd_backup[i]); 7452 amdgpu_bo_free_kernel(&ring->mqd_obj, 7453 &ring->mqd_gpu_addr, 7454 &ring->mqd_ptr); 7455 } 7456 7457 ring = &adev->gfx.kiq.ring; 7458 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); 7459 amdgpu_bo_free_kernel(&ring->mqd_obj, 7460 &ring->mqd_gpu_addr, 7461 &ring->mqd_ptr); 7462 } 7463