1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vi_structs.h" 29 #include "vid.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_atombios.h" 32 #include "atombios_i2c.h" 33 #include "clearstate_vi.h" 34 35 #include "gmc/gmc_8_2_d.h" 36 #include "gmc/gmc_8_2_sh_mask.h" 37 38 #include "oss/oss_3_0_d.h" 39 #include "oss/oss_3_0_sh_mask.h" 40 41 #include "bif/bif_5_0_d.h" 42 #include "bif/bif_5_0_sh_mask.h" 43 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_NUM_COMPUTE_RINGS 8 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 134 135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 148 149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 150 { 151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 167 }; 168 169 static const u32 golden_settings_tonga_a11[] = 170 { 171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 173 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 174 mmGB_GPU_ID, 0x0000000f, 0x00000000, 175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 181 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 187 }; 188 189 static const u32 tonga_golden_common_all[] = 190 { 191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 199 }; 200 201 static const u32 tonga_mgcg_cgcg_init[] = 202 { 203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 278 }; 279 280 static const u32 golden_settings_polaris11_a11[] = 281 { 282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 285 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 292 mmSQ_CONFIG, 0x07f80000, 0x01180000, 293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 294 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 299 }; 300 301 static const u32 polaris11_golden_common_all[] = 302 { 303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 309 }; 310 311 static const u32 golden_settings_polaris10_a11[] = 312 { 313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 317 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 324 mmSQ_CONFIG, 0x07f80000, 0x07180000, 325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 326 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 330 }; 331 332 static const u32 polaris10_golden_common_all[] = 333 { 334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 342 }; 343 344 static const u32 fiji_golden_common_all[] = 345 { 346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 356 }; 357 358 static const u32 golden_settings_fiji_a10[] = 359 { 360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 361 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 367 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 371 }; 372 373 static const u32 fiji_mgcg_cgcg_init[] = 374 { 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 410 }; 411 412 static const u32 golden_settings_iceland_a11[] = 413 { 414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 415 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 416 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 417 mmGB_GPU_ID, 0x0000000f, 0x00000000, 418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 425 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 430 }; 431 432 static const u32 iceland_golden_common_all[] = 433 { 434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 442 }; 443 444 static const u32 iceland_mgcg_cgcg_init[] = 445 { 446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 510 }; 511 512 static const u32 cz_golden_settings_a11[] = 513 { 514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 515 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 516 mmGB_GPU_ID, 0x0000000f, 0x00000000, 517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 526 }; 527 528 static const u32 cz_golden_common_all[] = 529 { 530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 538 }; 539 540 static const u32 cz_mgcg_cgcg_init[] = 541 { 542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 617 }; 618 619 static const u32 stoney_golden_settings_a11[] = 620 { 621 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 622 mmGB_GPU_ID, 0x0000000f, 0x00000000, 623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 631 }; 632 633 static const u32 stoney_golden_common_all[] = 634 { 635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 643 }; 644 645 static const u32 stoney_mgcg_cgcg_init[] = 646 { 647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 652 }; 653 654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); 661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); 662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); 663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); 664 665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 666 { 667 switch (adev->asic_type) { 668 case CHIP_TOPAZ: 669 amdgpu_program_register_sequence(adev, 670 iceland_mgcg_cgcg_init, 671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 672 amdgpu_program_register_sequence(adev, 673 golden_settings_iceland_a11, 674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 675 amdgpu_program_register_sequence(adev, 676 iceland_golden_common_all, 677 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 678 break; 679 case CHIP_FIJI: 680 amdgpu_program_register_sequence(adev, 681 fiji_mgcg_cgcg_init, 682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 683 amdgpu_program_register_sequence(adev, 684 golden_settings_fiji_a10, 685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 686 amdgpu_program_register_sequence(adev, 687 fiji_golden_common_all, 688 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 689 break; 690 691 case CHIP_TONGA: 692 amdgpu_program_register_sequence(adev, 693 tonga_mgcg_cgcg_init, 694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 695 amdgpu_program_register_sequence(adev, 696 golden_settings_tonga_a11, 697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 698 amdgpu_program_register_sequence(adev, 699 tonga_golden_common_all, 700 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 701 break; 702 case CHIP_POLARIS11: 703 case CHIP_POLARIS12: 704 amdgpu_program_register_sequence(adev, 705 golden_settings_polaris11_a11, 706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 707 amdgpu_program_register_sequence(adev, 708 polaris11_golden_common_all, 709 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 710 break; 711 case CHIP_POLARIS10: 712 amdgpu_program_register_sequence(adev, 713 golden_settings_polaris10_a11, 714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 715 amdgpu_program_register_sequence(adev, 716 polaris10_golden_common_all, 717 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 719 if (adev->pdev->revision == 0xc7 && 720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 725 } 726 break; 727 case CHIP_CARRIZO: 728 amdgpu_program_register_sequence(adev, 729 cz_mgcg_cgcg_init, 730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 731 amdgpu_program_register_sequence(adev, 732 cz_golden_settings_a11, 733 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 734 amdgpu_program_register_sequence(adev, 735 cz_golden_common_all, 736 (const u32)ARRAY_SIZE(cz_golden_common_all)); 737 break; 738 case CHIP_STONEY: 739 amdgpu_program_register_sequence(adev, 740 stoney_mgcg_cgcg_init, 741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 742 amdgpu_program_register_sequence(adev, 743 stoney_golden_settings_a11, 744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 745 amdgpu_program_register_sequence(adev, 746 stoney_golden_common_all, 747 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 748 break; 749 default: 750 break; 751 } 752 } 753 754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 755 { 756 adev->gfx.scratch.num_reg = 7; 757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 759 } 760 761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 762 { 763 struct amdgpu_device *adev = ring->adev; 764 uint32_t scratch; 765 uint32_t tmp = 0; 766 unsigned i; 767 int r; 768 769 r = amdgpu_gfx_scratch_get(adev, &scratch); 770 if (r) { 771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 772 return r; 773 } 774 WREG32(scratch, 0xCAFEDEAD); 775 r = amdgpu_ring_alloc(ring, 3); 776 if (r) { 777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 778 ring->idx, r); 779 amdgpu_gfx_scratch_free(adev, scratch); 780 return r; 781 } 782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 784 amdgpu_ring_write(ring, 0xDEADBEEF); 785 amdgpu_ring_commit(ring); 786 787 for (i = 0; i < adev->usec_timeout; i++) { 788 tmp = RREG32(scratch); 789 if (tmp == 0xDEADBEEF) 790 break; 791 DRM_UDELAY(1); 792 } 793 if (i < adev->usec_timeout) { 794 DRM_INFO("ring test on %d succeeded in %d usecs\n", 795 ring->idx, i); 796 } else { 797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 798 ring->idx, scratch, tmp); 799 r = -EINVAL; 800 } 801 amdgpu_gfx_scratch_free(adev, scratch); 802 return r; 803 } 804 805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 806 { 807 struct amdgpu_device *adev = ring->adev; 808 struct amdgpu_ib ib; 809 struct dma_fence *f = NULL; 810 uint32_t scratch; 811 uint32_t tmp = 0; 812 long r; 813 814 r = amdgpu_gfx_scratch_get(adev, &scratch); 815 if (r) { 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 817 return r; 818 } 819 WREG32(scratch, 0xCAFEDEAD); 820 memset(&ib, 0, sizeof(ib)); 821 r = amdgpu_ib_get(adev, NULL, 256, &ib); 822 if (r) { 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 824 goto err1; 825 } 826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 828 ib.ptr[2] = 0xDEADBEEF; 829 ib.length_dw = 3; 830 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 832 if (r) 833 goto err2; 834 835 r = dma_fence_wait_timeout(f, false, timeout); 836 if (r == 0) { 837 DRM_ERROR("amdgpu: IB test timed out.\n"); 838 r = -ETIMEDOUT; 839 goto err2; 840 } else if (r < 0) { 841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 842 goto err2; 843 } 844 tmp = RREG32(scratch); 845 if (tmp == 0xDEADBEEF) { 846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 847 r = 0; 848 } else { 849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 850 scratch, tmp); 851 r = -EINVAL; 852 } 853 err2: 854 amdgpu_ib_free(adev, &ib, NULL); 855 dma_fence_put(f); 856 err1: 857 amdgpu_gfx_scratch_free(adev, scratch); 858 return r; 859 } 860 861 862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 863 release_firmware(adev->gfx.pfp_fw); 864 adev->gfx.pfp_fw = NULL; 865 release_firmware(adev->gfx.me_fw); 866 adev->gfx.me_fw = NULL; 867 release_firmware(adev->gfx.ce_fw); 868 adev->gfx.ce_fw = NULL; 869 release_firmware(adev->gfx.rlc_fw); 870 adev->gfx.rlc_fw = NULL; 871 release_firmware(adev->gfx.mec_fw); 872 adev->gfx.mec_fw = NULL; 873 if ((adev->asic_type != CHIP_STONEY) && 874 (adev->asic_type != CHIP_TOPAZ)) 875 release_firmware(adev->gfx.mec2_fw); 876 adev->gfx.mec2_fw = NULL; 877 878 kfree(adev->gfx.rlc.register_list_format); 879 } 880 881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 882 { 883 const char *chip_name; 884 char fw_name[30]; 885 int err; 886 struct amdgpu_firmware_info *info = NULL; 887 const struct common_firmware_header *header = NULL; 888 const struct gfx_firmware_header_v1_0 *cp_hdr; 889 const struct rlc_firmware_header_v2_0 *rlc_hdr; 890 unsigned int *tmp = NULL, i; 891 892 DRM_DEBUG("\n"); 893 894 switch (adev->asic_type) { 895 case CHIP_TOPAZ: 896 chip_name = "topaz"; 897 break; 898 case CHIP_TONGA: 899 chip_name = "tonga"; 900 break; 901 case CHIP_CARRIZO: 902 chip_name = "carrizo"; 903 break; 904 case CHIP_FIJI: 905 chip_name = "fiji"; 906 break; 907 case CHIP_POLARIS11: 908 chip_name = "polaris11"; 909 break; 910 case CHIP_POLARIS10: 911 chip_name = "polaris10"; 912 break; 913 case CHIP_POLARIS12: 914 chip_name = "polaris12"; 915 break; 916 case CHIP_STONEY: 917 chip_name = "stoney"; 918 break; 919 default: 920 BUG(); 921 } 922 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 925 if (err) 926 goto out; 927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 928 if (err) 929 goto out; 930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 933 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 936 if (err) 937 goto out; 938 err = amdgpu_ucode_validate(adev->gfx.me_fw); 939 if (err) 940 goto out; 941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 943 944 /* chain ib ucode isn't formal released, just disable it by far 945 * TODO: when ucod ready we should use ucode version to judge if 946 * chain-ib support or not. 947 */ 948 adev->virt.chained_ib_support = false; 949 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 951 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 954 if (err) 955 goto out; 956 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 957 if (err) 958 goto out; 959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 962 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 965 if (err) 966 goto out; 967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 971 972 adev->gfx.rlc.save_and_restore_offset = 973 le32_to_cpu(rlc_hdr->save_and_restore_offset); 974 adev->gfx.rlc.clear_state_descriptor_offset = 975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 976 adev->gfx.rlc.avail_scratch_ram_locations = 977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 978 adev->gfx.rlc.reg_restore_list_size = 979 le32_to_cpu(rlc_hdr->reg_restore_list_size); 980 adev->gfx.rlc.reg_list_format_start = 981 le32_to_cpu(rlc_hdr->reg_list_format_start); 982 adev->gfx.rlc.reg_list_format_separate_start = 983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 984 adev->gfx.rlc.starting_offsets_start = 985 le32_to_cpu(rlc_hdr->starting_offsets_start); 986 adev->gfx.rlc.reg_list_format_size_bytes = 987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 988 adev->gfx.rlc.reg_list_size_bytes = 989 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 990 991 adev->gfx.rlc.register_list_format = 992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 994 995 if (!adev->gfx.rlc.register_list_format) { 996 err = -ENOMEM; 997 goto out; 998 } 999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1004 1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1006 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1011 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1014 if (err) 1015 goto out; 1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1017 if (err) 1018 goto out; 1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1022 1023 if ((adev->asic_type != CHIP_STONEY) && 1024 (adev->asic_type != CHIP_TOPAZ)) { 1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1027 if (!err) { 1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1029 if (err) 1030 goto out; 1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1032 adev->gfx.mec2_fw->data; 1033 adev->gfx.mec2_fw_version = 1034 le32_to_cpu(cp_hdr->header.ucode_version); 1035 adev->gfx.mec2_feature_version = 1036 le32_to_cpu(cp_hdr->ucode_feature_version); 1037 } else { 1038 err = 0; 1039 adev->gfx.mec2_fw = NULL; 1040 } 1041 } 1042 1043 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1046 info->fw = adev->gfx.pfp_fw; 1047 header = (const struct common_firmware_header *)info->fw->data; 1048 adev->firmware.fw_size += 1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1050 1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1053 info->fw = adev->gfx.me_fw; 1054 header = (const struct common_firmware_header *)info->fw->data; 1055 adev->firmware.fw_size += 1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1057 1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1060 info->fw = adev->gfx.ce_fw; 1061 header = (const struct common_firmware_header *)info->fw->data; 1062 adev->firmware.fw_size += 1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1064 1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1067 info->fw = adev->gfx.rlc_fw; 1068 header = (const struct common_firmware_header *)info->fw->data; 1069 adev->firmware.fw_size += 1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1071 1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1074 info->fw = adev->gfx.mec_fw; 1075 header = (const struct common_firmware_header *)info->fw->data; 1076 adev->firmware.fw_size += 1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1078 1079 /* we need account JT in */ 1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1081 adev->firmware.fw_size += 1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1083 1084 if (amdgpu_sriov_vf(adev)) { 1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1087 info->fw = adev->gfx.mec_fw; 1088 adev->firmware.fw_size += 1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1090 } 1091 1092 if (adev->gfx.mec2_fw) { 1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1095 info->fw = adev->gfx.mec2_fw; 1096 header = (const struct common_firmware_header *)info->fw->data; 1097 adev->firmware.fw_size += 1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1099 } 1100 1101 } 1102 1103 out: 1104 if (err) { 1105 dev_err(adev->dev, 1106 "gfx8: Failed to load firmware \"%s\"\n", 1107 fw_name); 1108 release_firmware(adev->gfx.pfp_fw); 1109 adev->gfx.pfp_fw = NULL; 1110 release_firmware(adev->gfx.me_fw); 1111 adev->gfx.me_fw = NULL; 1112 release_firmware(adev->gfx.ce_fw); 1113 adev->gfx.ce_fw = NULL; 1114 release_firmware(adev->gfx.rlc_fw); 1115 adev->gfx.rlc_fw = NULL; 1116 release_firmware(adev->gfx.mec_fw); 1117 adev->gfx.mec_fw = NULL; 1118 release_firmware(adev->gfx.mec2_fw); 1119 adev->gfx.mec2_fw = NULL; 1120 } 1121 return err; 1122 } 1123 1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1125 volatile u32 *buffer) 1126 { 1127 u32 count = 0, i; 1128 const struct cs_section_def *sect = NULL; 1129 const struct cs_extent_def *ext = NULL; 1130 1131 if (adev->gfx.rlc.cs_data == NULL) 1132 return; 1133 if (buffer == NULL) 1134 return; 1135 1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1138 1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1140 buffer[count++] = cpu_to_le32(0x80000000); 1141 buffer[count++] = cpu_to_le32(0x80000000); 1142 1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1144 for (ext = sect->section; ext->extent != NULL; ++ext) { 1145 if (sect->id == SECT_CONTEXT) { 1146 buffer[count++] = 1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1148 buffer[count++] = cpu_to_le32(ext->reg_index - 1149 PACKET3_SET_CONTEXT_REG_START); 1150 for (i = 0; i < ext->reg_count; i++) 1151 buffer[count++] = cpu_to_le32(ext->extent[i]); 1152 } else { 1153 return; 1154 } 1155 } 1156 } 1157 1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1160 PACKET3_SET_CONTEXT_REG_START); 1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1163 1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1166 1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1168 buffer[count++] = cpu_to_le32(0); 1169 } 1170 1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1172 { 1173 const __le32 *fw_data; 1174 volatile u32 *dst_ptr; 1175 int me, i, max_me = 4; 1176 u32 bo_offset = 0; 1177 u32 table_offset, table_size; 1178 1179 if (adev->asic_type == CHIP_CARRIZO) 1180 max_me = 5; 1181 1182 /* write the cp table buffer */ 1183 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1184 for (me = 0; me < max_me; me++) { 1185 if (me == 0) { 1186 const struct gfx_firmware_header_v1_0 *hdr = 1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1188 fw_data = (const __le32 *) 1189 (adev->gfx.ce_fw->data + 1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1191 table_offset = le32_to_cpu(hdr->jt_offset); 1192 table_size = le32_to_cpu(hdr->jt_size); 1193 } else if (me == 1) { 1194 const struct gfx_firmware_header_v1_0 *hdr = 1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1196 fw_data = (const __le32 *) 1197 (adev->gfx.pfp_fw->data + 1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1199 table_offset = le32_to_cpu(hdr->jt_offset); 1200 table_size = le32_to_cpu(hdr->jt_size); 1201 } else if (me == 2) { 1202 const struct gfx_firmware_header_v1_0 *hdr = 1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1204 fw_data = (const __le32 *) 1205 (adev->gfx.me_fw->data + 1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1207 table_offset = le32_to_cpu(hdr->jt_offset); 1208 table_size = le32_to_cpu(hdr->jt_size); 1209 } else if (me == 3) { 1210 const struct gfx_firmware_header_v1_0 *hdr = 1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1212 fw_data = (const __le32 *) 1213 (adev->gfx.mec_fw->data + 1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1215 table_offset = le32_to_cpu(hdr->jt_offset); 1216 table_size = le32_to_cpu(hdr->jt_size); 1217 } else if (me == 4) { 1218 const struct gfx_firmware_header_v1_0 *hdr = 1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1220 fw_data = (const __le32 *) 1221 (adev->gfx.mec2_fw->data + 1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1223 table_offset = le32_to_cpu(hdr->jt_offset); 1224 table_size = le32_to_cpu(hdr->jt_size); 1225 } 1226 1227 for (i = 0; i < table_size; i ++) { 1228 dst_ptr[bo_offset + i] = 1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1230 } 1231 1232 bo_offset += table_size; 1233 } 1234 } 1235 1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1237 { 1238 int r; 1239 1240 /* clear state block */ 1241 if (adev->gfx.rlc.clear_state_obj) { 1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1243 if (unlikely(r != 0)) 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1248 adev->gfx.rlc.clear_state_obj = NULL; 1249 } 1250 1251 /* jump table block */ 1252 if (adev->gfx.rlc.cp_table_obj) { 1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); 1254 if (unlikely(r != 0)) 1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1259 adev->gfx.rlc.cp_table_obj = NULL; 1260 } 1261 } 1262 1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1264 { 1265 volatile u32 *dst_ptr; 1266 u32 dws; 1267 const struct cs_section_def *cs_data; 1268 int r; 1269 1270 adev->gfx.rlc.cs_data = vi_cs_data; 1271 1272 cs_data = adev->gfx.rlc.cs_data; 1273 1274 if (cs_data) { 1275 /* clear state block */ 1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1277 1278 if (adev->gfx.rlc.clear_state_obj == NULL) { 1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1280 AMDGPU_GEM_DOMAIN_VRAM, 1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1283 NULL, NULL, 1284 &adev->gfx.rlc.clear_state_obj); 1285 if (r) { 1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1287 gfx_v8_0_rlc_fini(adev); 1288 return r; 1289 } 1290 } 1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1292 if (unlikely(r != 0)) { 1293 gfx_v8_0_rlc_fini(adev); 1294 return r; 1295 } 1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1297 &adev->gfx.rlc.clear_state_gpu_addr); 1298 if (r) { 1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1301 gfx_v8_0_rlc_fini(adev); 1302 return r; 1303 } 1304 1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1306 if (r) { 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1308 gfx_v8_0_rlc_fini(adev); 1309 return r; 1310 } 1311 /* set up the cs buffer */ 1312 dst_ptr = adev->gfx.rlc.cs_ptr; 1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1316 } 1317 1318 if ((adev->asic_type == CHIP_CARRIZO) || 1319 (adev->asic_type == CHIP_STONEY)) { 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1321 if (adev->gfx.rlc.cp_table_obj == NULL) { 1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1323 AMDGPU_GEM_DOMAIN_VRAM, 1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1326 NULL, NULL, 1327 &adev->gfx.rlc.cp_table_obj); 1328 if (r) { 1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1330 return r; 1331 } 1332 } 1333 1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1335 if (unlikely(r != 0)) { 1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1337 return r; 1338 } 1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1340 &adev->gfx.rlc.cp_table_gpu_addr); 1341 if (r) { 1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1344 return r; 1345 } 1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1347 if (r) { 1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1349 return r; 1350 } 1351 1352 cz_init_cp_jump_table(adev); 1353 1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1362 { 1363 int r; 1364 1365 if (adev->gfx.mec.hpd_eop_obj) { 1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); 1367 if (unlikely(r != 0)) 1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1372 adev->gfx.mec.hpd_eop_obj = NULL; 1373 } 1374 } 1375 1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, 1377 struct amdgpu_ring *ring, 1378 struct amdgpu_irq_src *irq) 1379 { 1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1381 int r = 0; 1382 1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); 1384 if (r) 1385 return r; 1386 1387 ring->adev = NULL; 1388 ring->ring_obj = NULL; 1389 ring->use_doorbell = true; 1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ; 1391 if (adev->gfx.mec2_fw) { 1392 ring->me = 2; 1393 ring->pipe = 0; 1394 } else { 1395 ring->me = 1; 1396 ring->pipe = 1; 1397 } 1398 1399 ring->queue = 0; 1400 ring->eop_gpu_addr = kiq->eop_gpu_addr; 1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); 1402 r = amdgpu_ring_init(adev, ring, 1024, 1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); 1404 if (r) 1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 1406 1407 return r; 1408 } 1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, 1410 struct amdgpu_irq_src *irq) 1411 { 1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); 1413 amdgpu_ring_fini(ring); 1414 } 1415 1416 #define MEC_HPD_SIZE 2048 1417 1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1419 { 1420 int r; 1421 u32 *hpd; 1422 1423 /* 1424 * we assign only 1 pipe because all other pipes will 1425 * be handled by KFD 1426 */ 1427 adev->gfx.mec.num_mec = 1; 1428 adev->gfx.mec.num_pipe = 1; 1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1430 1431 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1432 r = amdgpu_bo_create(adev, 1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 1434 PAGE_SIZE, true, 1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1436 &adev->gfx.mec.hpd_eop_obj); 1437 if (r) { 1438 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1439 return r; 1440 } 1441 } 1442 1443 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1444 if (unlikely(r != 0)) { 1445 gfx_v8_0_mec_fini(adev); 1446 return r; 1447 } 1448 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1449 &adev->gfx.mec.hpd_eop_gpu_addr); 1450 if (r) { 1451 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1452 gfx_v8_0_mec_fini(adev); 1453 return r; 1454 } 1455 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1456 if (r) { 1457 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1458 gfx_v8_0_mec_fini(adev); 1459 return r; 1460 } 1461 1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); 1463 1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1466 1467 return 0; 1468 } 1469 1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) 1471 { 1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1473 1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 1475 } 1476 1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) 1478 { 1479 int r; 1480 u32 *hpd; 1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1482 1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, 1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 1485 &kiq->eop_gpu_addr, (void **)&hpd); 1486 if (r) { 1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 1488 return r; 1489 } 1490 1491 memset(hpd, 0, MEC_HPD_SIZE); 1492 1493 r = amdgpu_bo_reserve(kiq->eop_obj, true); 1494 if (unlikely(r != 0)) 1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 1496 amdgpu_bo_kunmap(kiq->eop_obj); 1497 amdgpu_bo_unreserve(kiq->eop_obj); 1498 1499 return 0; 1500 } 1501 1502 static const u32 vgpr_init_compute_shader[] = 1503 { 1504 0x7e000209, 0x7e020208, 1505 0x7e040207, 0x7e060206, 1506 0x7e080205, 0x7e0a0204, 1507 0x7e0c0203, 0x7e0e0202, 1508 0x7e100201, 0x7e120200, 1509 0x7e140209, 0x7e160208, 1510 0x7e180207, 0x7e1a0206, 1511 0x7e1c0205, 0x7e1e0204, 1512 0x7e200203, 0x7e220202, 1513 0x7e240201, 0x7e260200, 1514 0x7e280209, 0x7e2a0208, 1515 0x7e2c0207, 0x7e2e0206, 1516 0x7e300205, 0x7e320204, 1517 0x7e340203, 0x7e360202, 1518 0x7e380201, 0x7e3a0200, 1519 0x7e3c0209, 0x7e3e0208, 1520 0x7e400207, 0x7e420206, 1521 0x7e440205, 0x7e460204, 1522 0x7e480203, 0x7e4a0202, 1523 0x7e4c0201, 0x7e4e0200, 1524 0x7e500209, 0x7e520208, 1525 0x7e540207, 0x7e560206, 1526 0x7e580205, 0x7e5a0204, 1527 0x7e5c0203, 0x7e5e0202, 1528 0x7e600201, 0x7e620200, 1529 0x7e640209, 0x7e660208, 1530 0x7e680207, 0x7e6a0206, 1531 0x7e6c0205, 0x7e6e0204, 1532 0x7e700203, 0x7e720202, 1533 0x7e740201, 0x7e760200, 1534 0x7e780209, 0x7e7a0208, 1535 0x7e7c0207, 0x7e7e0206, 1536 0xbf8a0000, 0xbf810000, 1537 }; 1538 1539 static const u32 sgpr_init_compute_shader[] = 1540 { 1541 0xbe8a0100, 0xbe8c0102, 1542 0xbe8e0104, 0xbe900106, 1543 0xbe920108, 0xbe940100, 1544 0xbe960102, 0xbe980104, 1545 0xbe9a0106, 0xbe9c0108, 1546 0xbe9e0100, 0xbea00102, 1547 0xbea20104, 0xbea40106, 1548 0xbea60108, 0xbea80100, 1549 0xbeaa0102, 0xbeac0104, 1550 0xbeae0106, 0xbeb00108, 1551 0xbeb20100, 0xbeb40102, 1552 0xbeb60104, 0xbeb80106, 1553 0xbeba0108, 0xbebc0100, 1554 0xbebe0102, 0xbec00104, 1555 0xbec20106, 0xbec40108, 1556 0xbec60100, 0xbec80102, 1557 0xbee60004, 0xbee70005, 1558 0xbeea0006, 0xbeeb0007, 1559 0xbee80008, 0xbee90009, 1560 0xbefc0000, 0xbf8a0000, 1561 0xbf810000, 0x00000000, 1562 }; 1563 1564 static const u32 vgpr_init_regs[] = 1565 { 1566 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1567 mmCOMPUTE_RESOURCE_LIMITS, 0, 1568 mmCOMPUTE_NUM_THREAD_X, 256*4, 1569 mmCOMPUTE_NUM_THREAD_Y, 1, 1570 mmCOMPUTE_NUM_THREAD_Z, 1, 1571 mmCOMPUTE_PGM_RSRC2, 20, 1572 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1573 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1574 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1575 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1576 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1577 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1578 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1579 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1580 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1581 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1582 }; 1583 1584 static const u32 sgpr1_init_regs[] = 1585 { 1586 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1587 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1588 mmCOMPUTE_NUM_THREAD_X, 256*5, 1589 mmCOMPUTE_NUM_THREAD_Y, 1, 1590 mmCOMPUTE_NUM_THREAD_Z, 1, 1591 mmCOMPUTE_PGM_RSRC2, 20, 1592 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1593 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1594 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1595 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1596 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1597 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1598 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1599 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1600 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1601 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1602 }; 1603 1604 static const u32 sgpr2_init_regs[] = 1605 { 1606 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1607 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1608 mmCOMPUTE_NUM_THREAD_X, 256*5, 1609 mmCOMPUTE_NUM_THREAD_Y, 1, 1610 mmCOMPUTE_NUM_THREAD_Z, 1, 1611 mmCOMPUTE_PGM_RSRC2, 20, 1612 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1613 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1614 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1615 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1616 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1617 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1618 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1619 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1620 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1621 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1622 }; 1623 1624 static const u32 sec_ded_counter_registers[] = 1625 { 1626 mmCPC_EDC_ATC_CNT, 1627 mmCPC_EDC_SCRATCH_CNT, 1628 mmCPC_EDC_UCODE_CNT, 1629 mmCPF_EDC_ATC_CNT, 1630 mmCPF_EDC_ROQ_CNT, 1631 mmCPF_EDC_TAG_CNT, 1632 mmCPG_EDC_ATC_CNT, 1633 mmCPG_EDC_DMA_CNT, 1634 mmCPG_EDC_TAG_CNT, 1635 mmDC_EDC_CSINVOC_CNT, 1636 mmDC_EDC_RESTORE_CNT, 1637 mmDC_EDC_STATE_CNT, 1638 mmGDS_EDC_CNT, 1639 mmGDS_EDC_GRBM_CNT, 1640 mmGDS_EDC_OA_DED, 1641 mmSPI_EDC_CNT, 1642 mmSQC_ATC_EDC_GATCL1_CNT, 1643 mmSQC_EDC_CNT, 1644 mmSQ_EDC_DED_CNT, 1645 mmSQ_EDC_INFO, 1646 mmSQ_EDC_SEC_CNT, 1647 mmTCC_EDC_CNT, 1648 mmTCP_ATC_EDC_GATCL1_CNT, 1649 mmTCP_EDC_CNT, 1650 mmTD_EDC_CNT 1651 }; 1652 1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1654 { 1655 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1656 struct amdgpu_ib ib; 1657 struct dma_fence *f = NULL; 1658 int r, i; 1659 u32 tmp; 1660 unsigned total_size, vgpr_offset, sgpr_offset; 1661 u64 gpu_addr; 1662 1663 /* only supported on CZ */ 1664 if (adev->asic_type != CHIP_CARRIZO) 1665 return 0; 1666 1667 /* bail if the compute ring is not ready */ 1668 if (!ring->ready) 1669 return 0; 1670 1671 tmp = RREG32(mmGB_EDC_MODE); 1672 WREG32(mmGB_EDC_MODE, 0); 1673 1674 total_size = 1675 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1676 total_size += 1677 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1678 total_size += 1679 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1680 total_size = ALIGN(total_size, 256); 1681 vgpr_offset = total_size; 1682 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1683 sgpr_offset = total_size; 1684 total_size += sizeof(sgpr_init_compute_shader); 1685 1686 /* allocate an indirect buffer to put the commands in */ 1687 memset(&ib, 0, sizeof(ib)); 1688 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1689 if (r) { 1690 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1691 return r; 1692 } 1693 1694 /* load the compute shaders */ 1695 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1696 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1697 1698 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1699 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1700 1701 /* init the ib length to 0 */ 1702 ib.length_dw = 0; 1703 1704 /* VGPR */ 1705 /* write the register state for the compute dispatch */ 1706 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1708 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1709 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1710 } 1711 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1712 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1714 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1715 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1716 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1717 1718 /* write dispatch packet */ 1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1720 ib.ptr[ib.length_dw++] = 8; /* x */ 1721 ib.ptr[ib.length_dw++] = 1; /* y */ 1722 ib.ptr[ib.length_dw++] = 1; /* z */ 1723 ib.ptr[ib.length_dw++] = 1724 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1725 1726 /* write CS partial flush packet */ 1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1728 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1729 1730 /* SGPR1 */ 1731 /* write the register state for the compute dispatch */ 1732 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1734 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1735 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1736 } 1737 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1738 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1740 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1741 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1742 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1743 1744 /* write dispatch packet */ 1745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1746 ib.ptr[ib.length_dw++] = 8; /* x */ 1747 ib.ptr[ib.length_dw++] = 1; /* y */ 1748 ib.ptr[ib.length_dw++] = 1; /* z */ 1749 ib.ptr[ib.length_dw++] = 1750 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1751 1752 /* write CS partial flush packet */ 1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1754 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1755 1756 /* SGPR2 */ 1757 /* write the register state for the compute dispatch */ 1758 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1760 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1761 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1762 } 1763 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1764 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1766 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1767 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1768 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1769 1770 /* write dispatch packet */ 1771 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1772 ib.ptr[ib.length_dw++] = 8; /* x */ 1773 ib.ptr[ib.length_dw++] = 1; /* y */ 1774 ib.ptr[ib.length_dw++] = 1; /* z */ 1775 ib.ptr[ib.length_dw++] = 1776 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1777 1778 /* write CS partial flush packet */ 1779 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1780 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1781 1782 /* shedule the ib on the ring */ 1783 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1784 if (r) { 1785 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1786 goto fail; 1787 } 1788 1789 /* wait for the GPU to finish processing the IB */ 1790 r = dma_fence_wait(f, false); 1791 if (r) { 1792 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1793 goto fail; 1794 } 1795 1796 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1797 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1798 WREG32(mmGB_EDC_MODE, tmp); 1799 1800 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1801 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1802 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1803 1804 1805 /* read back registers to clear the counters */ 1806 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1807 RREG32(sec_ded_counter_registers[i]); 1808 1809 fail: 1810 amdgpu_ib_free(adev, &ib, NULL); 1811 dma_fence_put(f); 1812 1813 return r; 1814 } 1815 1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1817 { 1818 u32 gb_addr_config; 1819 u32 mc_shared_chmap, mc_arb_ramcfg; 1820 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1821 u32 tmp; 1822 int ret; 1823 1824 switch (adev->asic_type) { 1825 case CHIP_TOPAZ: 1826 adev->gfx.config.max_shader_engines = 1; 1827 adev->gfx.config.max_tile_pipes = 2; 1828 adev->gfx.config.max_cu_per_sh = 6; 1829 adev->gfx.config.max_sh_per_se = 1; 1830 adev->gfx.config.max_backends_per_se = 2; 1831 adev->gfx.config.max_texture_channel_caches = 2; 1832 adev->gfx.config.max_gprs = 256; 1833 adev->gfx.config.max_gs_threads = 32; 1834 adev->gfx.config.max_hw_contexts = 8; 1835 1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1840 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1841 break; 1842 case CHIP_FIJI: 1843 adev->gfx.config.max_shader_engines = 4; 1844 adev->gfx.config.max_tile_pipes = 16; 1845 adev->gfx.config.max_cu_per_sh = 16; 1846 adev->gfx.config.max_sh_per_se = 1; 1847 adev->gfx.config.max_backends_per_se = 4; 1848 adev->gfx.config.max_texture_channel_caches = 16; 1849 adev->gfx.config.max_gprs = 256; 1850 adev->gfx.config.max_gs_threads = 32; 1851 adev->gfx.config.max_hw_contexts = 8; 1852 1853 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1854 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1855 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1856 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1857 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1858 break; 1859 case CHIP_POLARIS11: 1860 case CHIP_POLARIS12: 1861 ret = amdgpu_atombios_get_gfx_info(adev); 1862 if (ret) 1863 return ret; 1864 adev->gfx.config.max_gprs = 256; 1865 adev->gfx.config.max_gs_threads = 32; 1866 adev->gfx.config.max_hw_contexts = 8; 1867 1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1872 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1873 break; 1874 case CHIP_POLARIS10: 1875 ret = amdgpu_atombios_get_gfx_info(adev); 1876 if (ret) 1877 return ret; 1878 adev->gfx.config.max_gprs = 256; 1879 adev->gfx.config.max_gs_threads = 32; 1880 adev->gfx.config.max_hw_contexts = 8; 1881 1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1887 break; 1888 case CHIP_TONGA: 1889 adev->gfx.config.max_shader_engines = 4; 1890 adev->gfx.config.max_tile_pipes = 8; 1891 adev->gfx.config.max_cu_per_sh = 8; 1892 adev->gfx.config.max_sh_per_se = 1; 1893 adev->gfx.config.max_backends_per_se = 2; 1894 adev->gfx.config.max_texture_channel_caches = 8; 1895 adev->gfx.config.max_gprs = 256; 1896 adev->gfx.config.max_gs_threads = 32; 1897 adev->gfx.config.max_hw_contexts = 8; 1898 1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1903 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1904 break; 1905 case CHIP_CARRIZO: 1906 adev->gfx.config.max_shader_engines = 1; 1907 adev->gfx.config.max_tile_pipes = 2; 1908 adev->gfx.config.max_sh_per_se = 1; 1909 adev->gfx.config.max_backends_per_se = 2; 1910 1911 switch (adev->pdev->revision) { 1912 case 0xc4: 1913 case 0x84: 1914 case 0xc8: 1915 case 0xcc: 1916 case 0xe1: 1917 case 0xe3: 1918 /* B10 */ 1919 adev->gfx.config.max_cu_per_sh = 8; 1920 break; 1921 case 0xc5: 1922 case 0x81: 1923 case 0x85: 1924 case 0xc9: 1925 case 0xcd: 1926 case 0xe2: 1927 case 0xe4: 1928 /* B8 */ 1929 adev->gfx.config.max_cu_per_sh = 6; 1930 break; 1931 case 0xc6: 1932 case 0xca: 1933 case 0xce: 1934 case 0x88: 1935 case 0xe6: 1936 /* B6 */ 1937 adev->gfx.config.max_cu_per_sh = 6; 1938 break; 1939 case 0xc7: 1940 case 0x87: 1941 case 0xcb: 1942 case 0xe5: 1943 case 0x89: 1944 default: 1945 /* B4 */ 1946 adev->gfx.config.max_cu_per_sh = 4; 1947 break; 1948 } 1949 1950 adev->gfx.config.max_texture_channel_caches = 2; 1951 adev->gfx.config.max_gprs = 256; 1952 adev->gfx.config.max_gs_threads = 32; 1953 adev->gfx.config.max_hw_contexts = 8; 1954 1955 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1956 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1957 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1958 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1959 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1960 break; 1961 case CHIP_STONEY: 1962 adev->gfx.config.max_shader_engines = 1; 1963 adev->gfx.config.max_tile_pipes = 2; 1964 adev->gfx.config.max_sh_per_se = 1; 1965 adev->gfx.config.max_backends_per_se = 1; 1966 1967 switch (adev->pdev->revision) { 1968 case 0x80: 1969 case 0x81: 1970 case 0xc0: 1971 case 0xc1: 1972 case 0xc2: 1973 case 0xc4: 1974 case 0xc8: 1975 case 0xc9: 1976 case 0xd6: 1977 case 0xda: 1978 case 0xe9: 1979 case 0xea: 1980 adev->gfx.config.max_cu_per_sh = 3; 1981 break; 1982 case 0x83: 1983 case 0xd0: 1984 case 0xd1: 1985 case 0xd2: 1986 case 0xd4: 1987 case 0xdb: 1988 case 0xe1: 1989 case 0xe2: 1990 default: 1991 adev->gfx.config.max_cu_per_sh = 2; 1992 break; 1993 } 1994 1995 adev->gfx.config.max_texture_channel_caches = 2; 1996 adev->gfx.config.max_gprs = 256; 1997 adev->gfx.config.max_gs_threads = 16; 1998 adev->gfx.config.max_hw_contexts = 8; 1999 2000 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2001 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2002 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2003 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 2004 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 2005 break; 2006 default: 2007 adev->gfx.config.max_shader_engines = 2; 2008 adev->gfx.config.max_tile_pipes = 4; 2009 adev->gfx.config.max_cu_per_sh = 2; 2010 adev->gfx.config.max_sh_per_se = 1; 2011 adev->gfx.config.max_backends_per_se = 2; 2012 adev->gfx.config.max_texture_channel_caches = 4; 2013 adev->gfx.config.max_gprs = 256; 2014 adev->gfx.config.max_gs_threads = 32; 2015 adev->gfx.config.max_hw_contexts = 8; 2016 2017 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2018 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2019 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2020 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 2021 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 2022 break; 2023 } 2024 2025 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 2026 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 2027 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 2028 2029 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 2030 adev->gfx.config.mem_max_burst_length_bytes = 256; 2031 if (adev->flags & AMD_IS_APU) { 2032 /* Get memory bank mapping mode. */ 2033 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 2034 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 2035 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 2036 2037 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 2038 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 2039 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 2040 2041 /* Validate settings in case only one DIMM installed. */ 2042 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 2043 dimm00_addr_map = 0; 2044 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 2045 dimm01_addr_map = 0; 2046 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 2047 dimm10_addr_map = 0; 2048 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 2049 dimm11_addr_map = 0; 2050 2051 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 2052 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 2053 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 2054 adev->gfx.config.mem_row_size_in_kb = 2; 2055 else 2056 adev->gfx.config.mem_row_size_in_kb = 1; 2057 } else { 2058 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 2059 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2060 if (adev->gfx.config.mem_row_size_in_kb > 4) 2061 adev->gfx.config.mem_row_size_in_kb = 4; 2062 } 2063 2064 adev->gfx.config.shader_engine_tile_size = 32; 2065 adev->gfx.config.num_gpus = 1; 2066 adev->gfx.config.multi_gpu_tile_size = 64; 2067 2068 /* fix up row size */ 2069 switch (adev->gfx.config.mem_row_size_in_kb) { 2070 case 1: 2071 default: 2072 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 2073 break; 2074 case 2: 2075 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 2076 break; 2077 case 4: 2078 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 2079 break; 2080 } 2081 adev->gfx.config.gb_addr_config = gb_addr_config; 2082 2083 return 0; 2084 } 2085 2086 static int gfx_v8_0_sw_init(void *handle) 2087 { 2088 int i, r; 2089 struct amdgpu_ring *ring; 2090 struct amdgpu_kiq *kiq; 2091 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2092 2093 /* KIQ event */ 2094 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2095 if (r) 2096 return r; 2097 2098 /* EOP Event */ 2099 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 2100 if (r) 2101 return r; 2102 2103 /* Privileged reg */ 2104 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 2105 &adev->gfx.priv_reg_irq); 2106 if (r) 2107 return r; 2108 2109 /* Privileged inst */ 2110 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 2111 &adev->gfx.priv_inst_irq); 2112 if (r) 2113 return r; 2114 2115 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2116 2117 gfx_v8_0_scratch_init(adev); 2118 2119 r = gfx_v8_0_init_microcode(adev); 2120 if (r) { 2121 DRM_ERROR("Failed to load gfx firmware!\n"); 2122 return r; 2123 } 2124 2125 r = gfx_v8_0_rlc_init(adev); 2126 if (r) { 2127 DRM_ERROR("Failed to init rlc BOs!\n"); 2128 return r; 2129 } 2130 2131 r = gfx_v8_0_mec_init(adev); 2132 if (r) { 2133 DRM_ERROR("Failed to init MEC BOs!\n"); 2134 return r; 2135 } 2136 2137 /* set up the gfx ring */ 2138 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2139 ring = &adev->gfx.gfx_ring[i]; 2140 ring->ring_obj = NULL; 2141 sprintf(ring->name, "gfx"); 2142 /* no gfx doorbells on iceland */ 2143 if (adev->asic_type != CHIP_TOPAZ) { 2144 ring->use_doorbell = true; 2145 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2146 } 2147 2148 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2149 AMDGPU_CP_IRQ_GFX_EOP); 2150 if (r) 2151 return r; 2152 } 2153 2154 /* set up the compute queues */ 2155 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2156 unsigned irq_type; 2157 2158 /* max 32 queues per MEC */ 2159 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2160 DRM_ERROR("Too many (%d) compute rings!\n", i); 2161 break; 2162 } 2163 ring = &adev->gfx.compute_ring[i]; 2164 ring->ring_obj = NULL; 2165 ring->use_doorbell = true; 2166 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2167 ring->me = 1; /* first MEC */ 2168 ring->pipe = i / 8; 2169 ring->queue = i % 8; 2170 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 2171 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2172 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2173 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2174 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2175 irq_type); 2176 if (r) 2177 return r; 2178 } 2179 2180 if (amdgpu_sriov_vf(adev)) { 2181 r = gfx_v8_0_kiq_init(adev); 2182 if (r) { 2183 DRM_ERROR("Failed to init KIQ BOs!\n"); 2184 return r; 2185 } 2186 2187 kiq = &adev->gfx.kiq; 2188 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2189 if (r) 2190 return r; 2191 2192 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2193 r = gfx_v8_0_compute_mqd_sw_init(adev); 2194 if (r) 2195 return r; 2196 } 2197 2198 /* reserve GDS, GWS and OA resource for gfx */ 2199 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2200 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2201 &adev->gds.gds_gfx_bo, NULL, NULL); 2202 if (r) 2203 return r; 2204 2205 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2206 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2207 &adev->gds.gws_gfx_bo, NULL, NULL); 2208 if (r) 2209 return r; 2210 2211 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2212 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2213 &adev->gds.oa_gfx_bo, NULL, NULL); 2214 if (r) 2215 return r; 2216 2217 adev->gfx.ce_ram_size = 0x8000; 2218 2219 r = gfx_v8_0_gpu_early_init(adev); 2220 if (r) 2221 return r; 2222 2223 return 0; 2224 } 2225 2226 static int gfx_v8_0_sw_fini(void *handle) 2227 { 2228 int i; 2229 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2230 2231 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2232 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2233 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2234 2235 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2236 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2237 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2238 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2239 2240 if (amdgpu_sriov_vf(adev)) { 2241 gfx_v8_0_compute_mqd_sw_fini(adev); 2242 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2243 gfx_v8_0_kiq_fini(adev); 2244 } 2245 2246 gfx_v8_0_mec_fini(adev); 2247 gfx_v8_0_rlc_fini(adev); 2248 gfx_v8_0_free_microcode(adev); 2249 2250 return 0; 2251 } 2252 2253 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2254 { 2255 uint32_t *modearray, *mod2array; 2256 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2257 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2258 u32 reg_offset; 2259 2260 modearray = adev->gfx.config.tile_mode_array; 2261 mod2array = adev->gfx.config.macrotile_mode_array; 2262 2263 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2264 modearray[reg_offset] = 0; 2265 2266 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2267 mod2array[reg_offset] = 0; 2268 2269 switch (adev->asic_type) { 2270 case CHIP_TOPAZ: 2271 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2275 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2279 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2283 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2287 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2288 PIPE_CONFIG(ADDR_SURF_P2) | 2289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2291 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2292 PIPE_CONFIG(ADDR_SURF_P2) | 2293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2295 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2296 PIPE_CONFIG(ADDR_SURF_P2) | 2297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2299 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2300 PIPE_CONFIG(ADDR_SURF_P2)); 2301 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2302 PIPE_CONFIG(ADDR_SURF_P2) | 2303 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2305 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2306 PIPE_CONFIG(ADDR_SURF_P2) | 2307 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2309 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2310 PIPE_CONFIG(ADDR_SURF_P2) | 2311 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2313 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2314 PIPE_CONFIG(ADDR_SURF_P2) | 2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2317 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2318 PIPE_CONFIG(ADDR_SURF_P2) | 2319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2321 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2322 PIPE_CONFIG(ADDR_SURF_P2) | 2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2325 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2326 PIPE_CONFIG(ADDR_SURF_P2) | 2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2329 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2330 PIPE_CONFIG(ADDR_SURF_P2) | 2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2333 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2334 PIPE_CONFIG(ADDR_SURF_P2) | 2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2337 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2338 PIPE_CONFIG(ADDR_SURF_P2) | 2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2341 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2342 PIPE_CONFIG(ADDR_SURF_P2) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2345 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2346 PIPE_CONFIG(ADDR_SURF_P2) | 2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2349 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2350 PIPE_CONFIG(ADDR_SURF_P2) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2353 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2354 PIPE_CONFIG(ADDR_SURF_P2) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2357 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2358 PIPE_CONFIG(ADDR_SURF_P2) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2361 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2362 PIPE_CONFIG(ADDR_SURF_P2) | 2363 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2365 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2366 PIPE_CONFIG(ADDR_SURF_P2) | 2367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2369 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2370 PIPE_CONFIG(ADDR_SURF_P2) | 2371 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2373 2374 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2377 NUM_BANKS(ADDR_SURF_8_BANK)); 2378 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2381 NUM_BANKS(ADDR_SURF_8_BANK)); 2382 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2385 NUM_BANKS(ADDR_SURF_8_BANK)); 2386 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2389 NUM_BANKS(ADDR_SURF_8_BANK)); 2390 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2393 NUM_BANKS(ADDR_SURF_8_BANK)); 2394 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2397 NUM_BANKS(ADDR_SURF_8_BANK)); 2398 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2401 NUM_BANKS(ADDR_SURF_8_BANK)); 2402 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2405 NUM_BANKS(ADDR_SURF_16_BANK)); 2406 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2409 NUM_BANKS(ADDR_SURF_16_BANK)); 2410 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2413 NUM_BANKS(ADDR_SURF_16_BANK)); 2414 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2417 NUM_BANKS(ADDR_SURF_16_BANK)); 2418 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2421 NUM_BANKS(ADDR_SURF_16_BANK)); 2422 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2425 NUM_BANKS(ADDR_SURF_16_BANK)); 2426 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2429 NUM_BANKS(ADDR_SURF_8_BANK)); 2430 2431 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2432 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2433 reg_offset != 23) 2434 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2435 2436 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2437 if (reg_offset != 7) 2438 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2439 2440 break; 2441 case CHIP_FIJI: 2442 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2446 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2450 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2454 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2458 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2462 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2466 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2467 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2470 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2471 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2472 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2474 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2475 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2476 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2480 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2484 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2488 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2489 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2492 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2496 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2500 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2504 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2505 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2508 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2509 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2512 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2516 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2520 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2521 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2524 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2528 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2532 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2536 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2537 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2540 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2541 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2544 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2545 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2548 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2552 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2553 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2556 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2557 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2560 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2562 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2564 2565 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2568 NUM_BANKS(ADDR_SURF_8_BANK)); 2569 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2572 NUM_BANKS(ADDR_SURF_8_BANK)); 2573 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2576 NUM_BANKS(ADDR_SURF_8_BANK)); 2577 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2580 NUM_BANKS(ADDR_SURF_8_BANK)); 2581 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2584 NUM_BANKS(ADDR_SURF_8_BANK)); 2585 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2588 NUM_BANKS(ADDR_SURF_8_BANK)); 2589 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2592 NUM_BANKS(ADDR_SURF_8_BANK)); 2593 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2596 NUM_BANKS(ADDR_SURF_8_BANK)); 2597 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2600 NUM_BANKS(ADDR_SURF_8_BANK)); 2601 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2604 NUM_BANKS(ADDR_SURF_8_BANK)); 2605 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2608 NUM_BANKS(ADDR_SURF_8_BANK)); 2609 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2612 NUM_BANKS(ADDR_SURF_8_BANK)); 2613 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2616 NUM_BANKS(ADDR_SURF_8_BANK)); 2617 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2620 NUM_BANKS(ADDR_SURF_4_BANK)); 2621 2622 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2623 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2624 2625 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2626 if (reg_offset != 7) 2627 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2628 2629 break; 2630 case CHIP_TONGA: 2631 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2635 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2639 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2643 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2647 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2651 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2653 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2655 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2656 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2657 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2659 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2660 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2661 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2663 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2664 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2665 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2669 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2671 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2673 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2675 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2677 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2681 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2685 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2686 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2689 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2693 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2694 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2697 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2698 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2701 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2705 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2709 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2713 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2717 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2721 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2725 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2726 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2727 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2729 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2730 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2731 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2733 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2734 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2735 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2737 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2738 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2739 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2741 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2742 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2743 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2745 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2746 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2747 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2749 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2751 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2753 2754 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2757 NUM_BANKS(ADDR_SURF_16_BANK)); 2758 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2761 NUM_BANKS(ADDR_SURF_16_BANK)); 2762 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2765 NUM_BANKS(ADDR_SURF_16_BANK)); 2766 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2769 NUM_BANKS(ADDR_SURF_16_BANK)); 2770 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2773 NUM_BANKS(ADDR_SURF_16_BANK)); 2774 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2777 NUM_BANKS(ADDR_SURF_16_BANK)); 2778 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2781 NUM_BANKS(ADDR_SURF_16_BANK)); 2782 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2785 NUM_BANKS(ADDR_SURF_16_BANK)); 2786 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2789 NUM_BANKS(ADDR_SURF_16_BANK)); 2790 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2793 NUM_BANKS(ADDR_SURF_16_BANK)); 2794 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2797 NUM_BANKS(ADDR_SURF_16_BANK)); 2798 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2801 NUM_BANKS(ADDR_SURF_8_BANK)); 2802 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2805 NUM_BANKS(ADDR_SURF_4_BANK)); 2806 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2809 NUM_BANKS(ADDR_SURF_4_BANK)); 2810 2811 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2812 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2813 2814 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2815 if (reg_offset != 7) 2816 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2817 2818 break; 2819 case CHIP_POLARIS11: 2820 case CHIP_POLARIS12: 2821 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2825 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2829 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2833 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2837 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2841 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2843 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2845 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2847 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2849 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2850 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2851 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2853 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2854 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2855 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2859 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2863 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2867 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2869 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2871 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2875 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2879 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2883 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2887 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2891 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2895 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2899 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2903 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2907 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2911 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2915 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2917 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2919 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2920 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2921 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2923 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2924 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2925 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2927 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2929 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2931 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2932 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2933 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2934 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2935 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2936 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2937 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2939 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2940 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2941 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2943 2944 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2947 NUM_BANKS(ADDR_SURF_16_BANK)); 2948 2949 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2952 NUM_BANKS(ADDR_SURF_16_BANK)); 2953 2954 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2957 NUM_BANKS(ADDR_SURF_16_BANK)); 2958 2959 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2962 NUM_BANKS(ADDR_SURF_16_BANK)); 2963 2964 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2967 NUM_BANKS(ADDR_SURF_16_BANK)); 2968 2969 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2972 NUM_BANKS(ADDR_SURF_16_BANK)); 2973 2974 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2977 NUM_BANKS(ADDR_SURF_16_BANK)); 2978 2979 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2982 NUM_BANKS(ADDR_SURF_16_BANK)); 2983 2984 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2987 NUM_BANKS(ADDR_SURF_16_BANK)); 2988 2989 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2992 NUM_BANKS(ADDR_SURF_16_BANK)); 2993 2994 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2997 NUM_BANKS(ADDR_SURF_16_BANK)); 2998 2999 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3000 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3001 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3002 NUM_BANKS(ADDR_SURF_16_BANK)); 3003 3004 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3007 NUM_BANKS(ADDR_SURF_8_BANK)); 3008 3009 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3012 NUM_BANKS(ADDR_SURF_4_BANK)); 3013 3014 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3015 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3016 3017 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3018 if (reg_offset != 7) 3019 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3020 3021 break; 3022 case CHIP_POLARIS10: 3023 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3027 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3029 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3031 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3035 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3039 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3043 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3047 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3051 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3052 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3053 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3055 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3057 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3061 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3065 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3067 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3069 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3070 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3071 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3073 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3077 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3081 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3085 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3086 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3089 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3090 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3093 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3097 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3101 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3105 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3109 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3113 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3114 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3117 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3121 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3123 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3125 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3127 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3129 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3130 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3133 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3134 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3135 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3137 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3138 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3139 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3141 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3142 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3143 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3145 3146 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3149 NUM_BANKS(ADDR_SURF_16_BANK)); 3150 3151 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3154 NUM_BANKS(ADDR_SURF_16_BANK)); 3155 3156 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3159 NUM_BANKS(ADDR_SURF_16_BANK)); 3160 3161 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3164 NUM_BANKS(ADDR_SURF_16_BANK)); 3165 3166 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3169 NUM_BANKS(ADDR_SURF_16_BANK)); 3170 3171 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3174 NUM_BANKS(ADDR_SURF_16_BANK)); 3175 3176 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3179 NUM_BANKS(ADDR_SURF_16_BANK)); 3180 3181 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3184 NUM_BANKS(ADDR_SURF_16_BANK)); 3185 3186 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3189 NUM_BANKS(ADDR_SURF_16_BANK)); 3190 3191 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3194 NUM_BANKS(ADDR_SURF_16_BANK)); 3195 3196 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3199 NUM_BANKS(ADDR_SURF_16_BANK)); 3200 3201 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3204 NUM_BANKS(ADDR_SURF_8_BANK)); 3205 3206 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3209 NUM_BANKS(ADDR_SURF_4_BANK)); 3210 3211 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3214 NUM_BANKS(ADDR_SURF_4_BANK)); 3215 3216 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3217 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3218 3219 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3220 if (reg_offset != 7) 3221 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3222 3223 break; 3224 case CHIP_STONEY: 3225 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3226 PIPE_CONFIG(ADDR_SURF_P2) | 3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3229 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3233 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3234 PIPE_CONFIG(ADDR_SURF_P2) | 3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3237 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3238 PIPE_CONFIG(ADDR_SURF_P2) | 3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3241 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3242 PIPE_CONFIG(ADDR_SURF_P2) | 3243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3245 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3246 PIPE_CONFIG(ADDR_SURF_P2) | 3247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3249 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3250 PIPE_CONFIG(ADDR_SURF_P2) | 3251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3253 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3254 PIPE_CONFIG(ADDR_SURF_P2)); 3255 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3256 PIPE_CONFIG(ADDR_SURF_P2) | 3257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3259 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3260 PIPE_CONFIG(ADDR_SURF_P2) | 3261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3263 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3264 PIPE_CONFIG(ADDR_SURF_P2) | 3265 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3267 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3268 PIPE_CONFIG(ADDR_SURF_P2) | 3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3271 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3272 PIPE_CONFIG(ADDR_SURF_P2) | 3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3275 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3276 PIPE_CONFIG(ADDR_SURF_P2) | 3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3279 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3283 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3287 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3291 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3295 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3299 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3303 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3307 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3311 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3312 PIPE_CONFIG(ADDR_SURF_P2) | 3313 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3315 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3316 PIPE_CONFIG(ADDR_SURF_P2) | 3317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3319 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3320 PIPE_CONFIG(ADDR_SURF_P2) | 3321 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3323 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3324 PIPE_CONFIG(ADDR_SURF_P2) | 3325 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3327 3328 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3331 NUM_BANKS(ADDR_SURF_8_BANK)); 3332 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3335 NUM_BANKS(ADDR_SURF_8_BANK)); 3336 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3339 NUM_BANKS(ADDR_SURF_8_BANK)); 3340 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3343 NUM_BANKS(ADDR_SURF_8_BANK)); 3344 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3347 NUM_BANKS(ADDR_SURF_8_BANK)); 3348 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3351 NUM_BANKS(ADDR_SURF_8_BANK)); 3352 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3355 NUM_BANKS(ADDR_SURF_8_BANK)); 3356 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3359 NUM_BANKS(ADDR_SURF_16_BANK)); 3360 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3363 NUM_BANKS(ADDR_SURF_16_BANK)); 3364 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3367 NUM_BANKS(ADDR_SURF_16_BANK)); 3368 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3371 NUM_BANKS(ADDR_SURF_16_BANK)); 3372 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3375 NUM_BANKS(ADDR_SURF_16_BANK)); 3376 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3379 NUM_BANKS(ADDR_SURF_16_BANK)); 3380 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3383 NUM_BANKS(ADDR_SURF_8_BANK)); 3384 3385 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3386 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3387 reg_offset != 23) 3388 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3389 3390 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3391 if (reg_offset != 7) 3392 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3393 3394 break; 3395 default: 3396 dev_warn(adev->dev, 3397 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3398 adev->asic_type); 3399 3400 case CHIP_CARRIZO: 3401 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3402 PIPE_CONFIG(ADDR_SURF_P2) | 3403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3405 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3406 PIPE_CONFIG(ADDR_SURF_P2) | 3407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3409 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3410 PIPE_CONFIG(ADDR_SURF_P2) | 3411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3413 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3414 PIPE_CONFIG(ADDR_SURF_P2) | 3415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3417 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3418 PIPE_CONFIG(ADDR_SURF_P2) | 3419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3421 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3422 PIPE_CONFIG(ADDR_SURF_P2) | 3423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3425 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3426 PIPE_CONFIG(ADDR_SURF_P2) | 3427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3428 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3429 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3430 PIPE_CONFIG(ADDR_SURF_P2)); 3431 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3432 PIPE_CONFIG(ADDR_SURF_P2) | 3433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3435 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3436 PIPE_CONFIG(ADDR_SURF_P2) | 3437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3439 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3440 PIPE_CONFIG(ADDR_SURF_P2) | 3441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3443 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3444 PIPE_CONFIG(ADDR_SURF_P2) | 3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3447 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3448 PIPE_CONFIG(ADDR_SURF_P2) | 3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3451 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3452 PIPE_CONFIG(ADDR_SURF_P2) | 3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3455 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3456 PIPE_CONFIG(ADDR_SURF_P2) | 3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3459 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3460 PIPE_CONFIG(ADDR_SURF_P2) | 3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3463 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3464 PIPE_CONFIG(ADDR_SURF_P2) | 3465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3467 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3468 PIPE_CONFIG(ADDR_SURF_P2) | 3469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3471 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3472 PIPE_CONFIG(ADDR_SURF_P2) | 3473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3475 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3476 PIPE_CONFIG(ADDR_SURF_P2) | 3477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3479 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3480 PIPE_CONFIG(ADDR_SURF_P2) | 3481 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3483 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3484 PIPE_CONFIG(ADDR_SURF_P2) | 3485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3487 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3488 PIPE_CONFIG(ADDR_SURF_P2) | 3489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3491 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3492 PIPE_CONFIG(ADDR_SURF_P2) | 3493 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3495 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3496 PIPE_CONFIG(ADDR_SURF_P2) | 3497 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3499 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3500 PIPE_CONFIG(ADDR_SURF_P2) | 3501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3503 3504 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3507 NUM_BANKS(ADDR_SURF_8_BANK)); 3508 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3511 NUM_BANKS(ADDR_SURF_8_BANK)); 3512 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3515 NUM_BANKS(ADDR_SURF_8_BANK)); 3516 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3519 NUM_BANKS(ADDR_SURF_8_BANK)); 3520 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3523 NUM_BANKS(ADDR_SURF_8_BANK)); 3524 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3527 NUM_BANKS(ADDR_SURF_8_BANK)); 3528 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3531 NUM_BANKS(ADDR_SURF_8_BANK)); 3532 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3535 NUM_BANKS(ADDR_SURF_16_BANK)); 3536 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3539 NUM_BANKS(ADDR_SURF_16_BANK)); 3540 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3543 NUM_BANKS(ADDR_SURF_16_BANK)); 3544 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3547 NUM_BANKS(ADDR_SURF_16_BANK)); 3548 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3551 NUM_BANKS(ADDR_SURF_16_BANK)); 3552 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3555 NUM_BANKS(ADDR_SURF_16_BANK)); 3556 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3559 NUM_BANKS(ADDR_SURF_8_BANK)); 3560 3561 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3562 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3563 reg_offset != 23) 3564 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3565 3566 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3567 if (reg_offset != 7) 3568 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3569 3570 break; 3571 } 3572 } 3573 3574 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3575 u32 se_num, u32 sh_num, u32 instance) 3576 { 3577 u32 data; 3578 3579 if (instance == 0xffffffff) 3580 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3581 else 3582 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3583 3584 if (se_num == 0xffffffff) 3585 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3586 else 3587 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3588 3589 if (sh_num == 0xffffffff) 3590 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3591 else 3592 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3593 3594 WREG32(mmGRBM_GFX_INDEX, data); 3595 } 3596 3597 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3598 { 3599 return (u32)((1ULL << bit_width) - 1); 3600 } 3601 3602 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3603 { 3604 u32 data, mask; 3605 3606 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3607 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3608 3609 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3610 3611 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3612 adev->gfx.config.max_sh_per_se); 3613 3614 return (~data) & mask; 3615 } 3616 3617 static void 3618 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3619 { 3620 switch (adev->asic_type) { 3621 case CHIP_FIJI: 3622 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3623 RB_XSEL2(1) | PKR_MAP(2) | 3624 PKR_XSEL(1) | PKR_YSEL(1) | 3625 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3626 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3627 SE_PAIR_YSEL(2); 3628 break; 3629 case CHIP_TONGA: 3630 case CHIP_POLARIS10: 3631 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3632 SE_XSEL(1) | SE_YSEL(1); 3633 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3634 SE_PAIR_YSEL(2); 3635 break; 3636 case CHIP_TOPAZ: 3637 case CHIP_CARRIZO: 3638 *rconf |= RB_MAP_PKR0(2); 3639 *rconf1 |= 0x0; 3640 break; 3641 case CHIP_POLARIS11: 3642 case CHIP_POLARIS12: 3643 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3644 SE_XSEL(1) | SE_YSEL(1); 3645 *rconf1 |= 0x0; 3646 break; 3647 case CHIP_STONEY: 3648 *rconf |= 0x0; 3649 *rconf1 |= 0x0; 3650 break; 3651 default: 3652 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3653 break; 3654 } 3655 } 3656 3657 static void 3658 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3659 u32 raster_config, u32 raster_config_1, 3660 unsigned rb_mask, unsigned num_rb) 3661 { 3662 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3663 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3664 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3665 unsigned rb_per_se = num_rb / num_se; 3666 unsigned se_mask[4]; 3667 unsigned se; 3668 3669 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3670 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3671 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3672 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3673 3674 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3675 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3676 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3677 3678 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3679 (!se_mask[2] && !se_mask[3]))) { 3680 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3681 3682 if (!se_mask[0] && !se_mask[1]) { 3683 raster_config_1 |= 3684 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3685 } else { 3686 raster_config_1 |= 3687 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3688 } 3689 } 3690 3691 for (se = 0; se < num_se; se++) { 3692 unsigned raster_config_se = raster_config; 3693 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3694 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3695 int idx = (se / 2) * 2; 3696 3697 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3698 raster_config_se &= ~SE_MAP_MASK; 3699 3700 if (!se_mask[idx]) { 3701 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3702 } else { 3703 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3704 } 3705 } 3706 3707 pkr0_mask &= rb_mask; 3708 pkr1_mask &= rb_mask; 3709 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3710 raster_config_se &= ~PKR_MAP_MASK; 3711 3712 if (!pkr0_mask) { 3713 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3714 } else { 3715 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3716 } 3717 } 3718 3719 if (rb_per_se >= 2) { 3720 unsigned rb0_mask = 1 << (se * rb_per_se); 3721 unsigned rb1_mask = rb0_mask << 1; 3722 3723 rb0_mask &= rb_mask; 3724 rb1_mask &= rb_mask; 3725 if (!rb0_mask || !rb1_mask) { 3726 raster_config_se &= ~RB_MAP_PKR0_MASK; 3727 3728 if (!rb0_mask) { 3729 raster_config_se |= 3730 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3731 } else { 3732 raster_config_se |= 3733 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3734 } 3735 } 3736 3737 if (rb_per_se > 2) { 3738 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3739 rb1_mask = rb0_mask << 1; 3740 rb0_mask &= rb_mask; 3741 rb1_mask &= rb_mask; 3742 if (!rb0_mask || !rb1_mask) { 3743 raster_config_se &= ~RB_MAP_PKR1_MASK; 3744 3745 if (!rb0_mask) { 3746 raster_config_se |= 3747 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3748 } else { 3749 raster_config_se |= 3750 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3751 } 3752 } 3753 } 3754 } 3755 3756 /* GRBM_GFX_INDEX has a different offset on VI */ 3757 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3758 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3759 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3760 } 3761 3762 /* GRBM_GFX_INDEX has a different offset on VI */ 3763 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3764 } 3765 3766 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3767 { 3768 int i, j; 3769 u32 data; 3770 u32 raster_config = 0, raster_config_1 = 0; 3771 u32 active_rbs = 0; 3772 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3773 adev->gfx.config.max_sh_per_se; 3774 unsigned num_rb_pipes; 3775 3776 mutex_lock(&adev->grbm_idx_mutex); 3777 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3778 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3779 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3780 data = gfx_v8_0_get_rb_active_bitmap(adev); 3781 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3782 rb_bitmap_width_per_sh); 3783 } 3784 } 3785 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3786 3787 adev->gfx.config.backend_enable_mask = active_rbs; 3788 adev->gfx.config.num_rbs = hweight32(active_rbs); 3789 3790 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3791 adev->gfx.config.max_shader_engines, 16); 3792 3793 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3794 3795 if (!adev->gfx.config.backend_enable_mask || 3796 adev->gfx.config.num_rbs >= num_rb_pipes) { 3797 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3798 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3799 } else { 3800 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3801 adev->gfx.config.backend_enable_mask, 3802 num_rb_pipes); 3803 } 3804 3805 /* cache the values for userspace */ 3806 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3807 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3808 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3809 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3810 RREG32(mmCC_RB_BACKEND_DISABLE); 3811 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3812 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3813 adev->gfx.config.rb_config[i][j].raster_config = 3814 RREG32(mmPA_SC_RASTER_CONFIG); 3815 adev->gfx.config.rb_config[i][j].raster_config_1 = 3816 RREG32(mmPA_SC_RASTER_CONFIG_1); 3817 } 3818 } 3819 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3820 mutex_unlock(&adev->grbm_idx_mutex); 3821 } 3822 3823 /** 3824 * gfx_v8_0_init_compute_vmid - gart enable 3825 * 3826 * @rdev: amdgpu_device pointer 3827 * 3828 * Initialize compute vmid sh_mem registers 3829 * 3830 */ 3831 #define DEFAULT_SH_MEM_BASES (0x6000) 3832 #define FIRST_COMPUTE_VMID (8) 3833 #define LAST_COMPUTE_VMID (16) 3834 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3835 { 3836 int i; 3837 uint32_t sh_mem_config; 3838 uint32_t sh_mem_bases; 3839 3840 /* 3841 * Configure apertures: 3842 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3843 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3844 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3845 */ 3846 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3847 3848 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3849 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3850 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3851 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3852 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3853 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3854 3855 mutex_lock(&adev->srbm_mutex); 3856 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3857 vi_srbm_select(adev, 0, 0, 0, i); 3858 /* CP and shaders */ 3859 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3860 WREG32(mmSH_MEM_APE1_BASE, 1); 3861 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3862 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3863 } 3864 vi_srbm_select(adev, 0, 0, 0, 0); 3865 mutex_unlock(&adev->srbm_mutex); 3866 } 3867 3868 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3869 { 3870 switch (adev->asic_type) { 3871 default: 3872 adev->gfx.config.double_offchip_lds_buf = 1; 3873 break; 3874 case CHIP_CARRIZO: 3875 case CHIP_STONEY: 3876 adev->gfx.config.double_offchip_lds_buf = 0; 3877 break; 3878 } 3879 } 3880 3881 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3882 { 3883 u32 tmp, sh_static_mem_cfg; 3884 int i; 3885 3886 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3887 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3888 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3889 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3890 3891 gfx_v8_0_tiling_mode_table_init(adev); 3892 gfx_v8_0_setup_rb(adev); 3893 gfx_v8_0_get_cu_info(adev); 3894 gfx_v8_0_config_init(adev); 3895 3896 /* XXX SH_MEM regs */ 3897 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3898 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3899 SWIZZLE_ENABLE, 1); 3900 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3901 ELEMENT_SIZE, 1); 3902 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3903 INDEX_STRIDE, 3); 3904 mutex_lock(&adev->srbm_mutex); 3905 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3906 vi_srbm_select(adev, 0, 0, 0, i); 3907 /* CP and shaders */ 3908 if (i == 0) { 3909 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3910 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3911 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3912 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3913 WREG32(mmSH_MEM_CONFIG, tmp); 3914 WREG32(mmSH_MEM_BASES, 0); 3915 } else { 3916 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3917 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3918 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3919 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3920 WREG32(mmSH_MEM_CONFIG, tmp); 3921 tmp = adev->mc.shared_aperture_start >> 48; 3922 WREG32(mmSH_MEM_BASES, tmp); 3923 } 3924 3925 WREG32(mmSH_MEM_APE1_BASE, 1); 3926 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3927 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3928 } 3929 vi_srbm_select(adev, 0, 0, 0, 0); 3930 mutex_unlock(&adev->srbm_mutex); 3931 3932 gfx_v8_0_init_compute_vmid(adev); 3933 3934 mutex_lock(&adev->grbm_idx_mutex); 3935 /* 3936 * making sure that the following register writes will be broadcasted 3937 * to all the shaders 3938 */ 3939 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3940 3941 WREG32(mmPA_SC_FIFO_SIZE, 3942 (adev->gfx.config.sc_prim_fifo_size_frontend << 3943 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3944 (adev->gfx.config.sc_prim_fifo_size_backend << 3945 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3946 (adev->gfx.config.sc_hiz_tile_fifo_size << 3947 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3948 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3949 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3950 3951 tmp = RREG32(mmSPI_ARB_PRIORITY); 3952 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3953 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3954 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3955 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3956 WREG32(mmSPI_ARB_PRIORITY, tmp); 3957 3958 mutex_unlock(&adev->grbm_idx_mutex); 3959 3960 } 3961 3962 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3963 { 3964 u32 i, j, k; 3965 u32 mask; 3966 3967 mutex_lock(&adev->grbm_idx_mutex); 3968 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3969 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3970 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3971 for (k = 0; k < adev->usec_timeout; k++) { 3972 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3973 break; 3974 udelay(1); 3975 } 3976 } 3977 } 3978 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3979 mutex_unlock(&adev->grbm_idx_mutex); 3980 3981 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3982 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3983 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3984 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3985 for (k = 0; k < adev->usec_timeout; k++) { 3986 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3987 break; 3988 udelay(1); 3989 } 3990 } 3991 3992 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3993 bool enable) 3994 { 3995 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3996 3997 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3998 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3999 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 4000 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 4001 4002 WREG32(mmCP_INT_CNTL_RING0, tmp); 4003 } 4004 4005 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 4006 { 4007 /* csib */ 4008 WREG32(mmRLC_CSIB_ADDR_HI, 4009 adev->gfx.rlc.clear_state_gpu_addr >> 32); 4010 WREG32(mmRLC_CSIB_ADDR_LO, 4011 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 4012 WREG32(mmRLC_CSIB_LENGTH, 4013 adev->gfx.rlc.clear_state_size); 4014 } 4015 4016 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4017 int ind_offset, 4018 int list_size, 4019 int *unique_indices, 4020 int *indices_count, 4021 int max_indices, 4022 int *ind_start_offsets, 4023 int *offset_count, 4024 int max_offset) 4025 { 4026 int indices; 4027 bool new_entry = true; 4028 4029 for (; ind_offset < list_size; ind_offset++) { 4030 4031 if (new_entry) { 4032 new_entry = false; 4033 ind_start_offsets[*offset_count] = ind_offset; 4034 *offset_count = *offset_count + 1; 4035 BUG_ON(*offset_count >= max_offset); 4036 } 4037 4038 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4039 new_entry = true; 4040 continue; 4041 } 4042 4043 ind_offset += 2; 4044 4045 /* look for the matching indice */ 4046 for (indices = 0; 4047 indices < *indices_count; 4048 indices++) { 4049 if (unique_indices[indices] == 4050 register_list_format[ind_offset]) 4051 break; 4052 } 4053 4054 if (indices >= *indices_count) { 4055 unique_indices[*indices_count] = 4056 register_list_format[ind_offset]; 4057 indices = *indices_count; 4058 *indices_count = *indices_count + 1; 4059 BUG_ON(*indices_count >= max_indices); 4060 } 4061 4062 register_list_format[ind_offset] = indices; 4063 } 4064 } 4065 4066 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4067 { 4068 int i, temp, data; 4069 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4070 int indices_count = 0; 4071 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4072 int offset_count = 0; 4073 4074 int list_size; 4075 unsigned int *register_list_format = 4076 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4077 if (!register_list_format) 4078 return -ENOMEM; 4079 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4080 adev->gfx.rlc.reg_list_format_size_bytes); 4081 4082 gfx_v8_0_parse_ind_reg_list(register_list_format, 4083 RLC_FormatDirectRegListLength, 4084 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4085 unique_indices, 4086 &indices_count, 4087 sizeof(unique_indices) / sizeof(int), 4088 indirect_start_offsets, 4089 &offset_count, 4090 sizeof(indirect_start_offsets)/sizeof(int)); 4091 4092 /* save and restore list */ 4093 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4094 4095 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4096 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4097 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4098 4099 /* indirect list */ 4100 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4101 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4102 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4103 4104 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4105 list_size = list_size >> 1; 4106 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4107 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4108 4109 /* starting offsets starts */ 4110 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4111 adev->gfx.rlc.starting_offsets_start); 4112 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 4113 WREG32(mmRLC_GPM_SCRATCH_DATA, 4114 indirect_start_offsets[i]); 4115 4116 /* unique indices */ 4117 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4118 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4119 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 4120 if (unique_indices[i] != 0) { 4121 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4122 WREG32(data + i, unique_indices[i] >> 20); 4123 } 4124 } 4125 kfree(register_list_format); 4126 4127 return 0; 4128 } 4129 4130 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4131 { 4132 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4133 } 4134 4135 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4136 { 4137 uint32_t data; 4138 4139 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4140 4141 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4142 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4143 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4144 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4145 WREG32(mmRLC_PG_DELAY, data); 4146 4147 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4148 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4149 4150 } 4151 4152 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4153 bool enable) 4154 { 4155 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4156 } 4157 4158 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4159 bool enable) 4160 { 4161 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4162 } 4163 4164 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4165 { 4166 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4167 } 4168 4169 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4170 { 4171 if ((adev->asic_type == CHIP_CARRIZO) || 4172 (adev->asic_type == CHIP_STONEY)) { 4173 gfx_v8_0_init_csb(adev); 4174 gfx_v8_0_init_save_restore_list(adev); 4175 gfx_v8_0_enable_save_restore_machine(adev); 4176 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4177 gfx_v8_0_init_power_gating(adev); 4178 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4179 } else if ((adev->asic_type == CHIP_POLARIS11) || 4180 (adev->asic_type == CHIP_POLARIS12)) { 4181 gfx_v8_0_init_csb(adev); 4182 gfx_v8_0_init_save_restore_list(adev); 4183 gfx_v8_0_enable_save_restore_machine(adev); 4184 gfx_v8_0_init_power_gating(adev); 4185 } 4186 4187 } 4188 4189 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4190 { 4191 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4192 4193 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4194 gfx_v8_0_wait_for_rlc_serdes(adev); 4195 } 4196 4197 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4198 { 4199 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4200 udelay(50); 4201 4202 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4203 udelay(50); 4204 } 4205 4206 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4207 { 4208 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4209 4210 /* carrizo do enable cp interrupt after cp inited */ 4211 if (!(adev->flags & AMD_IS_APU)) 4212 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4213 4214 udelay(50); 4215 } 4216 4217 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4218 { 4219 const struct rlc_firmware_header_v2_0 *hdr; 4220 const __le32 *fw_data; 4221 unsigned i, fw_size; 4222 4223 if (!adev->gfx.rlc_fw) 4224 return -EINVAL; 4225 4226 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4227 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4228 4229 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4230 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4231 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4232 4233 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4234 for (i = 0; i < fw_size; i++) 4235 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4236 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4237 4238 return 0; 4239 } 4240 4241 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4242 { 4243 int r; 4244 u32 tmp; 4245 4246 gfx_v8_0_rlc_stop(adev); 4247 4248 /* disable CG */ 4249 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4250 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4251 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4252 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4253 if (adev->asic_type == CHIP_POLARIS11 || 4254 adev->asic_type == CHIP_POLARIS10 || 4255 adev->asic_type == CHIP_POLARIS12) { 4256 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4257 tmp &= ~0x3; 4258 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4259 } 4260 4261 /* disable PG */ 4262 WREG32(mmRLC_PG_CNTL, 0); 4263 4264 gfx_v8_0_rlc_reset(adev); 4265 gfx_v8_0_init_pg(adev); 4266 4267 if (!adev->pp_enabled) { 4268 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4269 /* legacy rlc firmware loading */ 4270 r = gfx_v8_0_rlc_load_microcode(adev); 4271 if (r) 4272 return r; 4273 } else { 4274 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4275 AMDGPU_UCODE_ID_RLC_G); 4276 if (r) 4277 return -EINVAL; 4278 } 4279 } 4280 4281 gfx_v8_0_rlc_start(adev); 4282 4283 return 0; 4284 } 4285 4286 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4287 { 4288 int i; 4289 u32 tmp = RREG32(mmCP_ME_CNTL); 4290 4291 if (enable) { 4292 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4293 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4294 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4295 } else { 4296 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4297 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4298 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4299 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4300 adev->gfx.gfx_ring[i].ready = false; 4301 } 4302 WREG32(mmCP_ME_CNTL, tmp); 4303 udelay(50); 4304 } 4305 4306 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4307 { 4308 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4309 const struct gfx_firmware_header_v1_0 *ce_hdr; 4310 const struct gfx_firmware_header_v1_0 *me_hdr; 4311 const __le32 *fw_data; 4312 unsigned i, fw_size; 4313 4314 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4315 return -EINVAL; 4316 4317 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4318 adev->gfx.pfp_fw->data; 4319 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4320 adev->gfx.ce_fw->data; 4321 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4322 adev->gfx.me_fw->data; 4323 4324 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4325 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4326 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4327 4328 gfx_v8_0_cp_gfx_enable(adev, false); 4329 4330 /* PFP */ 4331 fw_data = (const __le32 *) 4332 (adev->gfx.pfp_fw->data + 4333 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4334 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4335 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4336 for (i = 0; i < fw_size; i++) 4337 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4338 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4339 4340 /* CE */ 4341 fw_data = (const __le32 *) 4342 (adev->gfx.ce_fw->data + 4343 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4344 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4345 WREG32(mmCP_CE_UCODE_ADDR, 0); 4346 for (i = 0; i < fw_size; i++) 4347 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4348 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4349 4350 /* ME */ 4351 fw_data = (const __le32 *) 4352 (adev->gfx.me_fw->data + 4353 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4354 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4355 WREG32(mmCP_ME_RAM_WADDR, 0); 4356 for (i = 0; i < fw_size; i++) 4357 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4358 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4359 4360 return 0; 4361 } 4362 4363 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4364 { 4365 u32 count = 0; 4366 const struct cs_section_def *sect = NULL; 4367 const struct cs_extent_def *ext = NULL; 4368 4369 /* begin clear state */ 4370 count += 2; 4371 /* context control state */ 4372 count += 3; 4373 4374 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4375 for (ext = sect->section; ext->extent != NULL; ++ext) { 4376 if (sect->id == SECT_CONTEXT) 4377 count += 2 + ext->reg_count; 4378 else 4379 return 0; 4380 } 4381 } 4382 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4383 count += 4; 4384 /* end clear state */ 4385 count += 2; 4386 /* clear state */ 4387 count += 2; 4388 4389 return count; 4390 } 4391 4392 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4393 { 4394 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4395 const struct cs_section_def *sect = NULL; 4396 const struct cs_extent_def *ext = NULL; 4397 int r, i; 4398 4399 /* init the CP */ 4400 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4401 WREG32(mmCP_ENDIAN_SWAP, 0); 4402 WREG32(mmCP_DEVICE_ID, 1); 4403 4404 gfx_v8_0_cp_gfx_enable(adev, true); 4405 4406 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4407 if (r) { 4408 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4409 return r; 4410 } 4411 4412 /* clear state buffer */ 4413 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4414 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4415 4416 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4417 amdgpu_ring_write(ring, 0x80000000); 4418 amdgpu_ring_write(ring, 0x80000000); 4419 4420 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4421 for (ext = sect->section; ext->extent != NULL; ++ext) { 4422 if (sect->id == SECT_CONTEXT) { 4423 amdgpu_ring_write(ring, 4424 PACKET3(PACKET3_SET_CONTEXT_REG, 4425 ext->reg_count)); 4426 amdgpu_ring_write(ring, 4427 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4428 for (i = 0; i < ext->reg_count; i++) 4429 amdgpu_ring_write(ring, ext->extent[i]); 4430 } 4431 } 4432 } 4433 4434 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4435 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4436 switch (adev->asic_type) { 4437 case CHIP_TONGA: 4438 case CHIP_POLARIS10: 4439 amdgpu_ring_write(ring, 0x16000012); 4440 amdgpu_ring_write(ring, 0x0000002A); 4441 break; 4442 case CHIP_POLARIS11: 4443 case CHIP_POLARIS12: 4444 amdgpu_ring_write(ring, 0x16000012); 4445 amdgpu_ring_write(ring, 0x00000000); 4446 break; 4447 case CHIP_FIJI: 4448 amdgpu_ring_write(ring, 0x3a00161a); 4449 amdgpu_ring_write(ring, 0x0000002e); 4450 break; 4451 case CHIP_CARRIZO: 4452 amdgpu_ring_write(ring, 0x00000002); 4453 amdgpu_ring_write(ring, 0x00000000); 4454 break; 4455 case CHIP_TOPAZ: 4456 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4457 0x00000000 : 0x00000002); 4458 amdgpu_ring_write(ring, 0x00000000); 4459 break; 4460 case CHIP_STONEY: 4461 amdgpu_ring_write(ring, 0x00000000); 4462 amdgpu_ring_write(ring, 0x00000000); 4463 break; 4464 default: 4465 BUG(); 4466 } 4467 4468 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4469 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4470 4471 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4472 amdgpu_ring_write(ring, 0); 4473 4474 /* init the CE partitions */ 4475 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4476 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4477 amdgpu_ring_write(ring, 0x8000); 4478 amdgpu_ring_write(ring, 0x8000); 4479 4480 amdgpu_ring_commit(ring); 4481 4482 return 0; 4483 } 4484 4485 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4486 { 4487 struct amdgpu_ring *ring; 4488 u32 tmp; 4489 u32 rb_bufsz; 4490 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4491 int r; 4492 4493 /* Set the write pointer delay */ 4494 WREG32(mmCP_RB_WPTR_DELAY, 0); 4495 4496 /* set the RB to use vmid 0 */ 4497 WREG32(mmCP_RB_VMID, 0); 4498 4499 /* Set ring buffer size */ 4500 ring = &adev->gfx.gfx_ring[0]; 4501 rb_bufsz = order_base_2(ring->ring_size / 8); 4502 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4503 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4504 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4505 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4506 #ifdef __BIG_ENDIAN 4507 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4508 #endif 4509 WREG32(mmCP_RB0_CNTL, tmp); 4510 4511 /* Initialize the ring buffer's read and write pointers */ 4512 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4513 ring->wptr = 0; 4514 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4515 4516 /* set the wb address wether it's enabled or not */ 4517 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4518 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4519 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4520 4521 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4522 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4523 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4524 mdelay(1); 4525 WREG32(mmCP_RB0_CNTL, tmp); 4526 4527 rb_addr = ring->gpu_addr >> 8; 4528 WREG32(mmCP_RB0_BASE, rb_addr); 4529 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4530 4531 /* no gfx doorbells on iceland */ 4532 if (adev->asic_type != CHIP_TOPAZ) { 4533 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4534 if (ring->use_doorbell) { 4535 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4536 DOORBELL_OFFSET, ring->doorbell_index); 4537 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4538 DOORBELL_HIT, 0); 4539 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4540 DOORBELL_EN, 1); 4541 } else { 4542 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4543 DOORBELL_EN, 0); 4544 } 4545 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4546 4547 if (adev->asic_type == CHIP_TONGA) { 4548 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4549 DOORBELL_RANGE_LOWER, 4550 AMDGPU_DOORBELL_GFX_RING0); 4551 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4552 4553 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4554 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4555 } 4556 4557 } 4558 4559 /* start the ring */ 4560 amdgpu_ring_clear_ring(ring); 4561 gfx_v8_0_cp_gfx_start(adev); 4562 ring->ready = true; 4563 r = amdgpu_ring_test_ring(ring); 4564 if (r) 4565 ring->ready = false; 4566 4567 return r; 4568 } 4569 4570 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4571 { 4572 int i; 4573 4574 if (enable) { 4575 WREG32(mmCP_MEC_CNTL, 0); 4576 } else { 4577 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4578 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4579 adev->gfx.compute_ring[i].ready = false; 4580 adev->gfx.kiq.ring.ready = false; 4581 } 4582 udelay(50); 4583 } 4584 4585 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4586 { 4587 const struct gfx_firmware_header_v1_0 *mec_hdr; 4588 const __le32 *fw_data; 4589 unsigned i, fw_size; 4590 4591 if (!adev->gfx.mec_fw) 4592 return -EINVAL; 4593 4594 gfx_v8_0_cp_compute_enable(adev, false); 4595 4596 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4597 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4598 4599 fw_data = (const __le32 *) 4600 (adev->gfx.mec_fw->data + 4601 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4602 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4603 4604 /* MEC1 */ 4605 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4606 for (i = 0; i < fw_size; i++) 4607 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4608 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4609 4610 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4611 if (adev->gfx.mec2_fw) { 4612 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4613 4614 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4615 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4616 4617 fw_data = (const __le32 *) 4618 (adev->gfx.mec2_fw->data + 4619 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4620 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4621 4622 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4623 for (i = 0; i < fw_size; i++) 4624 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4625 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4626 } 4627 4628 return 0; 4629 } 4630 4631 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4632 { 4633 int i, r; 4634 4635 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4636 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4637 4638 if (ring->mqd_obj) { 4639 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4640 if (unlikely(r != 0)) 4641 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4642 4643 amdgpu_bo_unpin(ring->mqd_obj); 4644 amdgpu_bo_unreserve(ring->mqd_obj); 4645 4646 amdgpu_bo_unref(&ring->mqd_obj); 4647 ring->mqd_obj = NULL; 4648 ring->mqd_ptr = NULL; 4649 ring->mqd_gpu_addr = 0; 4650 } 4651 } 4652 } 4653 4654 /* KIQ functions */ 4655 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4656 { 4657 uint32_t tmp; 4658 struct amdgpu_device *adev = ring->adev; 4659 4660 /* tell RLC which is KIQ queue */ 4661 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4662 tmp &= 0xffffff00; 4663 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4664 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4665 tmp |= 0x80; 4666 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4667 } 4668 4669 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) 4670 { 4671 amdgpu_ring_alloc(ring, 8); 4672 /* set resources */ 4673 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4674 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4675 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ 4676 amdgpu_ring_write(ring, 0); /* queue mask hi */ 4677 amdgpu_ring_write(ring, 0); /* gws mask lo */ 4678 amdgpu_ring_write(ring, 0); /* gws mask hi */ 4679 amdgpu_ring_write(ring, 0); /* oac mask */ 4680 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ 4681 amdgpu_ring_commit(ring); 4682 udelay(50); 4683 } 4684 4685 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, 4686 struct amdgpu_ring *ring) 4687 { 4688 struct amdgpu_device *adev = kiq_ring->adev; 4689 uint64_t mqd_addr, wptr_addr; 4690 4691 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4692 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4693 amdgpu_ring_alloc(kiq_ring, 8); 4694 4695 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4696 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4697 amdgpu_ring_write(kiq_ring, 0x21010000); 4698 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | 4699 (ring->queue << 26) | 4700 (ring->pipe << 29) | 4701 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ 4702 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4703 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4704 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4705 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4706 amdgpu_ring_commit(kiq_ring); 4707 udelay(50); 4708 } 4709 4710 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4711 { 4712 struct amdgpu_device *adev = ring->adev; 4713 struct vi_mqd *mqd = ring->mqd_ptr; 4714 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4715 uint32_t tmp; 4716 4717 mqd->header = 0xC0310800; 4718 mqd->compute_pipelinestat_enable = 0x00000001; 4719 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4720 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4721 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4722 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4723 mqd->compute_misc_reserved = 0x00000003; 4724 4725 eop_base_addr = ring->eop_gpu_addr >> 8; 4726 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4727 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4728 4729 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4730 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4731 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4732 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4733 4734 mqd->cp_hqd_eop_control = tmp; 4735 4736 /* enable doorbell? */ 4737 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4738 CP_HQD_PQ_DOORBELL_CONTROL, 4739 DOORBELL_EN, 4740 ring->use_doorbell ? 1 : 0); 4741 4742 mqd->cp_hqd_pq_doorbell_control = tmp; 4743 4744 /* disable the queue if it's active */ 4745 mqd->cp_hqd_dequeue_request = 0; 4746 mqd->cp_hqd_pq_rptr = 0; 4747 mqd->cp_hqd_pq_wptr = 0; 4748 4749 /* set the pointer to the MQD */ 4750 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4751 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4752 4753 /* set MQD vmid to 0 */ 4754 tmp = RREG32(mmCP_MQD_CONTROL); 4755 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4756 mqd->cp_mqd_control = tmp; 4757 4758 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4759 hqd_gpu_addr = ring->gpu_addr >> 8; 4760 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4761 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4762 4763 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4764 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4766 (order_base_2(ring->ring_size / 4) - 1)); 4767 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4768 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4769 #ifdef __BIG_ENDIAN 4770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4771 #endif 4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4774 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4775 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4776 mqd->cp_hqd_pq_control = tmp; 4777 4778 /* set the wb address whether it's enabled or not */ 4779 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4780 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4781 mqd->cp_hqd_pq_rptr_report_addr_hi = 4782 upper_32_bits(wb_gpu_addr) & 0xffff; 4783 4784 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4785 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4786 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4787 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4788 4789 tmp = 0; 4790 /* enable the doorbell if requested */ 4791 if (ring->use_doorbell) { 4792 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4794 DOORBELL_OFFSET, ring->doorbell_index); 4795 4796 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4797 DOORBELL_EN, 1); 4798 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4799 DOORBELL_SOURCE, 0); 4800 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4801 DOORBELL_HIT, 0); 4802 } 4803 4804 mqd->cp_hqd_pq_doorbell_control = tmp; 4805 4806 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4807 ring->wptr = 0; 4808 mqd->cp_hqd_pq_wptr = ring->wptr; 4809 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4810 4811 /* set the vmid for the queue */ 4812 mqd->cp_hqd_vmid = 0; 4813 4814 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4815 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4816 mqd->cp_hqd_persistent_state = tmp; 4817 4818 /* activate the queue */ 4819 mqd->cp_hqd_active = 1; 4820 4821 return 0; 4822 } 4823 4824 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) 4825 { 4826 struct amdgpu_device *adev = ring->adev; 4827 struct vi_mqd *mqd = ring->mqd_ptr; 4828 int j; 4829 4830 /* disable wptr polling */ 4831 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4832 4833 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); 4834 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); 4835 4836 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4837 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); 4838 4839 /* enable doorbell? */ 4840 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4841 4842 /* disable the queue if it's active */ 4843 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4844 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4845 for (j = 0; j < adev->usec_timeout; j++) { 4846 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4847 break; 4848 udelay(1); 4849 } 4850 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4851 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4852 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4853 } 4854 4855 /* set the pointer to the MQD */ 4856 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4857 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4858 4859 /* set MQD vmid to 0 */ 4860 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); 4861 4862 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4863 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4864 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4865 4866 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4867 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 4868 4869 /* set the wb address whether it's enabled or not */ 4870 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4871 mqd->cp_hqd_pq_rptr_report_addr_lo); 4872 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4873 mqd->cp_hqd_pq_rptr_report_addr_hi); 4874 4875 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4876 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 4877 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); 4878 4879 /* enable the doorbell if requested */ 4880 if (ring->use_doorbell) { 4881 if ((adev->asic_type == CHIP_CARRIZO) || 4882 (adev->asic_type == CHIP_FIJI) || 4883 (adev->asic_type == CHIP_STONEY)) { 4884 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4885 AMDGPU_DOORBELL_KIQ << 2); 4886 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4887 AMDGPU_DOORBELL_MEC_RING7 << 2); 4888 } 4889 } 4890 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4891 4892 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4893 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4894 4895 /* set the vmid for the queue */ 4896 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4897 4898 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 4899 4900 /* activate the queue */ 4901 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4902 4903 if (ring->use_doorbell) 4904 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4905 4906 return 0; 4907 } 4908 4909 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4910 { 4911 struct amdgpu_device *adev = ring->adev; 4912 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4913 struct vi_mqd *mqd = ring->mqd_ptr; 4914 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); 4915 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4916 4917 if (is_kiq) { 4918 gfx_v8_0_kiq_setting(&kiq->ring); 4919 } else { 4920 mqd_idx = ring - &adev->gfx.compute_ring[0]; 4921 } 4922 4923 if (!adev->gfx.in_reset) { 4924 memset((void *)mqd, 0, sizeof(*mqd)); 4925 mutex_lock(&adev->srbm_mutex); 4926 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4927 gfx_v8_0_mqd_init(ring); 4928 if (is_kiq) 4929 gfx_v8_0_kiq_init_register(ring); 4930 vi_srbm_select(adev, 0, 0, 0, 0); 4931 mutex_unlock(&adev->srbm_mutex); 4932 4933 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4934 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4935 } else { /* for GPU_RESET case */ 4936 /* reset MQD to a clean status */ 4937 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4938 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4939 4940 /* reset ring buffer */ 4941 ring->wptr = 0; 4942 amdgpu_ring_clear_ring(ring); 4943 4944 if (is_kiq) { 4945 mutex_lock(&adev->srbm_mutex); 4946 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4947 gfx_v8_0_kiq_init_register(ring); 4948 vi_srbm_select(adev, 0, 0, 0, 0); 4949 mutex_unlock(&adev->srbm_mutex); 4950 } 4951 } 4952 4953 if (is_kiq) 4954 gfx_v8_0_kiq_enable(ring); 4955 else 4956 gfx_v8_0_map_queue_enable(&kiq->ring, ring); 4957 4958 return 0; 4959 } 4960 4961 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4962 { 4963 struct amdgpu_ring *ring = NULL; 4964 int r = 0, i; 4965 4966 gfx_v8_0_cp_compute_enable(adev, true); 4967 4968 ring = &adev->gfx.kiq.ring; 4969 4970 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4971 if (unlikely(r != 0)) 4972 goto done; 4973 4974 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4975 if (!r) { 4976 r = gfx_v8_0_kiq_init_queue(ring); 4977 amdgpu_bo_kunmap(ring->mqd_obj); 4978 ring->mqd_ptr = NULL; 4979 } 4980 amdgpu_bo_unreserve(ring->mqd_obj); 4981 if (r) 4982 goto done; 4983 4984 ring->ready = true; 4985 r = amdgpu_ring_test_ring(ring); 4986 if (r) { 4987 ring->ready = false; 4988 goto done; 4989 } 4990 4991 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4992 ring = &adev->gfx.compute_ring[i]; 4993 4994 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4995 if (unlikely(r != 0)) 4996 goto done; 4997 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4998 if (!r) { 4999 r = gfx_v8_0_kiq_init_queue(ring); 5000 amdgpu_bo_kunmap(ring->mqd_obj); 5001 ring->mqd_ptr = NULL; 5002 } 5003 amdgpu_bo_unreserve(ring->mqd_obj); 5004 if (r) 5005 goto done; 5006 5007 ring->ready = true; 5008 r = amdgpu_ring_test_ring(ring); 5009 if (r) 5010 ring->ready = false; 5011 } 5012 5013 done: 5014 return r; 5015 } 5016 5017 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 5018 { 5019 int r, i, j; 5020 u32 tmp; 5021 bool use_doorbell = true; 5022 u64 hqd_gpu_addr; 5023 u64 mqd_gpu_addr; 5024 u64 eop_gpu_addr; 5025 u64 wb_gpu_addr; 5026 u32 *buf; 5027 struct vi_mqd *mqd; 5028 5029 /* init the queues. */ 5030 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5031 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5032 5033 if (ring->mqd_obj == NULL) { 5034 r = amdgpu_bo_create(adev, 5035 sizeof(struct vi_mqd), 5036 PAGE_SIZE, true, 5037 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 5038 NULL, &ring->mqd_obj); 5039 if (r) { 5040 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 5041 return r; 5042 } 5043 } 5044 5045 r = amdgpu_bo_reserve(ring->mqd_obj, false); 5046 if (unlikely(r != 0)) { 5047 gfx_v8_0_cp_compute_fini(adev); 5048 return r; 5049 } 5050 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 5051 &mqd_gpu_addr); 5052 if (r) { 5053 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 5054 gfx_v8_0_cp_compute_fini(adev); 5055 return r; 5056 } 5057 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 5058 if (r) { 5059 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 5060 gfx_v8_0_cp_compute_fini(adev); 5061 return r; 5062 } 5063 5064 /* init the mqd struct */ 5065 memset(buf, 0, sizeof(struct vi_mqd)); 5066 5067 mqd = (struct vi_mqd *)buf; 5068 mqd->header = 0xC0310800; 5069 mqd->compute_pipelinestat_enable = 0x00000001; 5070 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 5071 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 5072 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 5073 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 5074 mqd->compute_misc_reserved = 0x00000003; 5075 5076 mutex_lock(&adev->srbm_mutex); 5077 vi_srbm_select(adev, ring->me, 5078 ring->pipe, 5079 ring->queue, 0); 5080 5081 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 5082 eop_gpu_addr >>= 8; 5083 5084 /* write the EOP addr */ 5085 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 5086 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 5087 5088 /* set the VMID assigned */ 5089 WREG32(mmCP_HQD_VMID, 0); 5090 5091 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 5092 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 5093 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 5094 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 5095 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 5096 5097 /* disable wptr polling */ 5098 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 5099 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 5100 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 5101 5102 mqd->cp_hqd_eop_base_addr_lo = 5103 RREG32(mmCP_HQD_EOP_BASE_ADDR); 5104 mqd->cp_hqd_eop_base_addr_hi = 5105 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 5106 5107 /* enable doorbell? */ 5108 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 5109 if (use_doorbell) { 5110 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 5111 } else { 5112 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 5113 } 5114 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 5115 mqd->cp_hqd_pq_doorbell_control = tmp; 5116 5117 /* disable the queue if it's active */ 5118 mqd->cp_hqd_dequeue_request = 0; 5119 mqd->cp_hqd_pq_rptr = 0; 5120 mqd->cp_hqd_pq_wptr= 0; 5121 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 5122 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 5123 for (j = 0; j < adev->usec_timeout; j++) { 5124 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 5125 break; 5126 udelay(1); 5127 } 5128 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 5129 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 5130 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 5131 } 5132 5133 /* set the pointer to the MQD */ 5134 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 5135 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 5136 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 5137 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 5138 5139 /* set MQD vmid to 0 */ 5140 tmp = RREG32(mmCP_MQD_CONTROL); 5141 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 5142 WREG32(mmCP_MQD_CONTROL, tmp); 5143 mqd->cp_mqd_control = tmp; 5144 5145 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 5146 hqd_gpu_addr = ring->gpu_addr >> 8; 5147 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 5148 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 5149 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 5150 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 5151 5152 /* set up the HQD, this is similar to CP_RB0_CNTL */ 5153 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 5154 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 5155 (order_base_2(ring->ring_size / 4) - 1)); 5156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 5157 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 5158 #ifdef __BIG_ENDIAN 5159 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 5160 #endif 5161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 5162 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 5163 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 5164 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 5165 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 5166 mqd->cp_hqd_pq_control = tmp; 5167 5168 /* set the wb address wether it's enabled or not */ 5169 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 5170 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 5171 mqd->cp_hqd_pq_rptr_report_addr_hi = 5172 upper_32_bits(wb_gpu_addr) & 0xffff; 5173 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 5174 mqd->cp_hqd_pq_rptr_report_addr_lo); 5175 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 5176 mqd->cp_hqd_pq_rptr_report_addr_hi); 5177 5178 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 5179 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 5180 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 5181 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 5182 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 5183 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 5184 mqd->cp_hqd_pq_wptr_poll_addr_hi); 5185 5186 /* enable the doorbell if requested */ 5187 if (use_doorbell) { 5188 if ((adev->asic_type == CHIP_CARRIZO) || 5189 (adev->asic_type == CHIP_FIJI) || 5190 (adev->asic_type == CHIP_STONEY) || 5191 (adev->asic_type == CHIP_POLARIS11) || 5192 (adev->asic_type == CHIP_POLARIS10) || 5193 (adev->asic_type == CHIP_POLARIS12)) { 5194 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 5195 AMDGPU_DOORBELL_KIQ << 2); 5196 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 5197 AMDGPU_DOORBELL_MEC_RING7 << 2); 5198 } 5199 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 5200 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 5201 DOORBELL_OFFSET, ring->doorbell_index); 5202 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 5203 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 5204 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 5205 mqd->cp_hqd_pq_doorbell_control = tmp; 5206 5207 } else { 5208 mqd->cp_hqd_pq_doorbell_control = 0; 5209 } 5210 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 5211 mqd->cp_hqd_pq_doorbell_control); 5212 5213 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 5214 ring->wptr = 0; 5215 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); 5216 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 5217 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 5218 5219 /* set the vmid for the queue */ 5220 mqd->cp_hqd_vmid = 0; 5221 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 5222 5223 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 5224 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 5225 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 5226 mqd->cp_hqd_persistent_state = tmp; 5227 if (adev->asic_type == CHIP_STONEY || 5228 adev->asic_type == CHIP_POLARIS11 || 5229 adev->asic_type == CHIP_POLARIS10 || 5230 adev->asic_type == CHIP_POLARIS12) { 5231 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 5232 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 5233 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 5234 } 5235 5236 /* activate the queue */ 5237 mqd->cp_hqd_active = 1; 5238 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 5239 5240 vi_srbm_select(adev, 0, 0, 0, 0); 5241 mutex_unlock(&adev->srbm_mutex); 5242 5243 amdgpu_bo_kunmap(ring->mqd_obj); 5244 amdgpu_bo_unreserve(ring->mqd_obj); 5245 } 5246 5247 if (use_doorbell) { 5248 tmp = RREG32(mmCP_PQ_STATUS); 5249 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 5250 WREG32(mmCP_PQ_STATUS, tmp); 5251 } 5252 5253 gfx_v8_0_cp_compute_enable(adev, true); 5254 5255 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5256 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5257 5258 ring->ready = true; 5259 r = amdgpu_ring_test_ring(ring); 5260 if (r) 5261 ring->ready = false; 5262 } 5263 5264 return 0; 5265 } 5266 5267 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5268 { 5269 int r; 5270 5271 if (!(adev->flags & AMD_IS_APU)) 5272 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5273 5274 if (!adev->pp_enabled) { 5275 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 5276 /* legacy firmware loading */ 5277 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5278 if (r) 5279 return r; 5280 5281 r = gfx_v8_0_cp_compute_load_microcode(adev); 5282 if (r) 5283 return r; 5284 } else { 5285 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5286 AMDGPU_UCODE_ID_CP_CE); 5287 if (r) 5288 return -EINVAL; 5289 5290 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5291 AMDGPU_UCODE_ID_CP_PFP); 5292 if (r) 5293 return -EINVAL; 5294 5295 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5296 AMDGPU_UCODE_ID_CP_ME); 5297 if (r) 5298 return -EINVAL; 5299 5300 if (adev->asic_type == CHIP_TOPAZ) { 5301 r = gfx_v8_0_cp_compute_load_microcode(adev); 5302 if (r) 5303 return r; 5304 } else { 5305 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5306 AMDGPU_UCODE_ID_CP_MEC1); 5307 if (r) 5308 return -EINVAL; 5309 } 5310 } 5311 } 5312 5313 r = gfx_v8_0_cp_gfx_resume(adev); 5314 if (r) 5315 return r; 5316 5317 if (amdgpu_sriov_vf(adev)) 5318 r = gfx_v8_0_kiq_resume(adev); 5319 else 5320 r = gfx_v8_0_cp_compute_resume(adev); 5321 if (r) 5322 return r; 5323 5324 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5325 5326 return 0; 5327 } 5328 5329 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5330 { 5331 gfx_v8_0_cp_gfx_enable(adev, enable); 5332 gfx_v8_0_cp_compute_enable(adev, enable); 5333 } 5334 5335 static int gfx_v8_0_hw_init(void *handle) 5336 { 5337 int r; 5338 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5339 5340 gfx_v8_0_init_golden_registers(adev); 5341 gfx_v8_0_gpu_init(adev); 5342 5343 r = gfx_v8_0_rlc_resume(adev); 5344 if (r) 5345 return r; 5346 5347 r = gfx_v8_0_cp_resume(adev); 5348 5349 return r; 5350 } 5351 5352 static int gfx_v8_0_hw_fini(void *handle) 5353 { 5354 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5355 5356 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5357 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5358 if (amdgpu_sriov_vf(adev)) { 5359 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5360 return 0; 5361 } 5362 gfx_v8_0_cp_enable(adev, false); 5363 gfx_v8_0_rlc_stop(adev); 5364 gfx_v8_0_cp_compute_fini(adev); 5365 5366 amdgpu_set_powergating_state(adev, 5367 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5368 5369 return 0; 5370 } 5371 5372 static int gfx_v8_0_suspend(void *handle) 5373 { 5374 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5375 5376 return gfx_v8_0_hw_fini(adev); 5377 } 5378 5379 static int gfx_v8_0_resume(void *handle) 5380 { 5381 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5382 5383 return gfx_v8_0_hw_init(adev); 5384 } 5385 5386 static bool gfx_v8_0_is_idle(void *handle) 5387 { 5388 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5389 5390 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5391 return false; 5392 else 5393 return true; 5394 } 5395 5396 static int gfx_v8_0_wait_for_idle(void *handle) 5397 { 5398 unsigned i; 5399 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5400 5401 for (i = 0; i < adev->usec_timeout; i++) { 5402 if (gfx_v8_0_is_idle(handle)) 5403 return 0; 5404 5405 udelay(1); 5406 } 5407 return -ETIMEDOUT; 5408 } 5409 5410 static bool gfx_v8_0_check_soft_reset(void *handle) 5411 { 5412 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5413 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5414 u32 tmp; 5415 5416 /* GRBM_STATUS */ 5417 tmp = RREG32(mmGRBM_STATUS); 5418 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5419 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5420 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5421 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5422 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5423 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5424 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5425 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5426 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5427 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5428 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5429 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5430 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5431 } 5432 5433 /* GRBM_STATUS2 */ 5434 tmp = RREG32(mmGRBM_STATUS2); 5435 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5436 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5437 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5438 5439 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5440 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5441 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5442 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5443 SOFT_RESET_CPF, 1); 5444 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5445 SOFT_RESET_CPC, 1); 5446 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5447 SOFT_RESET_CPG, 1); 5448 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5449 SOFT_RESET_GRBM, 1); 5450 } 5451 5452 /* SRBM_STATUS */ 5453 tmp = RREG32(mmSRBM_STATUS); 5454 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5455 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5456 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5457 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5458 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5459 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5460 5461 if (grbm_soft_reset || srbm_soft_reset) { 5462 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5463 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5464 return true; 5465 } else { 5466 adev->gfx.grbm_soft_reset = 0; 5467 adev->gfx.srbm_soft_reset = 0; 5468 return false; 5469 } 5470 } 5471 5472 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 5473 struct amdgpu_ring *ring) 5474 { 5475 int i; 5476 5477 mutex_lock(&adev->srbm_mutex); 5478 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5479 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 5480 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); 5481 for (i = 0; i < adev->usec_timeout; i++) { 5482 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 5483 break; 5484 udelay(1); 5485 } 5486 } 5487 vi_srbm_select(adev, 0, 0, 0, 0); 5488 mutex_unlock(&adev->srbm_mutex); 5489 } 5490 5491 static int gfx_v8_0_pre_soft_reset(void *handle) 5492 { 5493 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5494 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5495 5496 if ((!adev->gfx.grbm_soft_reset) && 5497 (!adev->gfx.srbm_soft_reset)) 5498 return 0; 5499 5500 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5501 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5502 5503 /* stop the rlc */ 5504 gfx_v8_0_rlc_stop(adev); 5505 5506 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5507 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5508 /* Disable GFX parsing/prefetching */ 5509 gfx_v8_0_cp_gfx_enable(adev, false); 5510 5511 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5512 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5513 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5514 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5515 int i; 5516 5517 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5518 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5519 5520 gfx_v8_0_inactive_hqd(adev, ring); 5521 } 5522 /* Disable MEC parsing/prefetching */ 5523 gfx_v8_0_cp_compute_enable(adev, false); 5524 } 5525 5526 return 0; 5527 } 5528 5529 static int gfx_v8_0_soft_reset(void *handle) 5530 { 5531 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5532 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5533 u32 tmp; 5534 5535 if ((!adev->gfx.grbm_soft_reset) && 5536 (!adev->gfx.srbm_soft_reset)) 5537 return 0; 5538 5539 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5540 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5541 5542 if (grbm_soft_reset || srbm_soft_reset) { 5543 tmp = RREG32(mmGMCON_DEBUG); 5544 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5545 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5546 WREG32(mmGMCON_DEBUG, tmp); 5547 udelay(50); 5548 } 5549 5550 if (grbm_soft_reset) { 5551 tmp = RREG32(mmGRBM_SOFT_RESET); 5552 tmp |= grbm_soft_reset; 5553 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5554 WREG32(mmGRBM_SOFT_RESET, tmp); 5555 tmp = RREG32(mmGRBM_SOFT_RESET); 5556 5557 udelay(50); 5558 5559 tmp &= ~grbm_soft_reset; 5560 WREG32(mmGRBM_SOFT_RESET, tmp); 5561 tmp = RREG32(mmGRBM_SOFT_RESET); 5562 } 5563 5564 if (srbm_soft_reset) { 5565 tmp = RREG32(mmSRBM_SOFT_RESET); 5566 tmp |= srbm_soft_reset; 5567 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5568 WREG32(mmSRBM_SOFT_RESET, tmp); 5569 tmp = RREG32(mmSRBM_SOFT_RESET); 5570 5571 udelay(50); 5572 5573 tmp &= ~srbm_soft_reset; 5574 WREG32(mmSRBM_SOFT_RESET, tmp); 5575 tmp = RREG32(mmSRBM_SOFT_RESET); 5576 } 5577 5578 if (grbm_soft_reset || srbm_soft_reset) { 5579 tmp = RREG32(mmGMCON_DEBUG); 5580 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5581 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5582 WREG32(mmGMCON_DEBUG, tmp); 5583 } 5584 5585 /* Wait a little for things to settle down */ 5586 udelay(50); 5587 5588 return 0; 5589 } 5590 5591 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5592 struct amdgpu_ring *ring) 5593 { 5594 mutex_lock(&adev->srbm_mutex); 5595 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5596 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5597 WREG32(mmCP_HQD_PQ_RPTR, 0); 5598 WREG32(mmCP_HQD_PQ_WPTR, 0); 5599 vi_srbm_select(adev, 0, 0, 0, 0); 5600 mutex_unlock(&adev->srbm_mutex); 5601 } 5602 5603 static int gfx_v8_0_post_soft_reset(void *handle) 5604 { 5605 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5606 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5607 5608 if ((!adev->gfx.grbm_soft_reset) && 5609 (!adev->gfx.srbm_soft_reset)) 5610 return 0; 5611 5612 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5613 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5614 5615 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5616 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5617 gfx_v8_0_cp_gfx_resume(adev); 5618 5619 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5620 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5621 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5622 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5623 int i; 5624 5625 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5626 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5627 5628 gfx_v8_0_init_hqd(adev, ring); 5629 } 5630 gfx_v8_0_cp_compute_resume(adev); 5631 } 5632 gfx_v8_0_rlc_start(adev); 5633 5634 return 0; 5635 } 5636 5637 /** 5638 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5639 * 5640 * @adev: amdgpu_device pointer 5641 * 5642 * Fetches a GPU clock counter snapshot. 5643 * Returns the 64 bit clock counter snapshot. 5644 */ 5645 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5646 { 5647 uint64_t clock; 5648 5649 mutex_lock(&adev->gfx.gpu_clock_mutex); 5650 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5651 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5652 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5653 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5654 return clock; 5655 } 5656 5657 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5658 uint32_t vmid, 5659 uint32_t gds_base, uint32_t gds_size, 5660 uint32_t gws_base, uint32_t gws_size, 5661 uint32_t oa_base, uint32_t oa_size) 5662 { 5663 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5664 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5665 5666 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5667 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5668 5669 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5670 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5671 5672 /* GDS Base */ 5673 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5674 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5675 WRITE_DATA_DST_SEL(0))); 5676 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5677 amdgpu_ring_write(ring, 0); 5678 amdgpu_ring_write(ring, gds_base); 5679 5680 /* GDS Size */ 5681 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5682 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5683 WRITE_DATA_DST_SEL(0))); 5684 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5685 amdgpu_ring_write(ring, 0); 5686 amdgpu_ring_write(ring, gds_size); 5687 5688 /* GWS */ 5689 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5690 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5691 WRITE_DATA_DST_SEL(0))); 5692 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5693 amdgpu_ring_write(ring, 0); 5694 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5695 5696 /* OA */ 5697 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5698 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5699 WRITE_DATA_DST_SEL(0))); 5700 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5701 amdgpu_ring_write(ring, 0); 5702 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5703 } 5704 5705 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5706 { 5707 WREG32(mmSQ_IND_INDEX, 5708 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5709 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5710 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5711 (SQ_IND_INDEX__FORCE_READ_MASK)); 5712 return RREG32(mmSQ_IND_DATA); 5713 } 5714 5715 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5716 uint32_t wave, uint32_t thread, 5717 uint32_t regno, uint32_t num, uint32_t *out) 5718 { 5719 WREG32(mmSQ_IND_INDEX, 5720 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5721 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5722 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5723 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5724 (SQ_IND_INDEX__FORCE_READ_MASK) | 5725 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5726 while (num--) 5727 *(out++) = RREG32(mmSQ_IND_DATA); 5728 } 5729 5730 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5731 { 5732 /* type 0 wave data */ 5733 dst[(*no_fields)++] = 0; 5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5740 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5741 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5742 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5743 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5744 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5745 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5746 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5747 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5748 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5749 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5750 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5751 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5752 } 5753 5754 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5755 uint32_t wave, uint32_t start, 5756 uint32_t size, uint32_t *dst) 5757 { 5758 wave_read_regs( 5759 adev, simd, wave, 0, 5760 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5761 } 5762 5763 5764 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5765 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5766 .select_se_sh = &gfx_v8_0_select_se_sh, 5767 .read_wave_data = &gfx_v8_0_read_wave_data, 5768 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5769 }; 5770 5771 static int gfx_v8_0_early_init(void *handle) 5772 { 5773 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5774 5775 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5776 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5777 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5778 gfx_v8_0_set_ring_funcs(adev); 5779 gfx_v8_0_set_irq_funcs(adev); 5780 gfx_v8_0_set_gds_init(adev); 5781 gfx_v8_0_set_rlc_funcs(adev); 5782 5783 return 0; 5784 } 5785 5786 static int gfx_v8_0_late_init(void *handle) 5787 { 5788 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5789 int r; 5790 5791 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5792 if (r) 5793 return r; 5794 5795 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5796 if (r) 5797 return r; 5798 5799 /* requires IBs so do in late init after IB pool is initialized */ 5800 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5801 if (r) 5802 return r; 5803 5804 amdgpu_set_powergating_state(adev, 5805 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5806 5807 return 0; 5808 } 5809 5810 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5811 bool enable) 5812 { 5813 if ((adev->asic_type == CHIP_POLARIS11) || 5814 (adev->asic_type == CHIP_POLARIS12)) 5815 /* Send msg to SMU via Powerplay */ 5816 amdgpu_set_powergating_state(adev, 5817 AMD_IP_BLOCK_TYPE_SMC, 5818 enable ? 5819 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5820 5821 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5822 } 5823 5824 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5825 bool enable) 5826 { 5827 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5828 } 5829 5830 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5831 bool enable) 5832 { 5833 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5834 } 5835 5836 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5837 bool enable) 5838 { 5839 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5840 } 5841 5842 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5843 bool enable) 5844 { 5845 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5846 5847 /* Read any GFX register to wake up GFX. */ 5848 if (!enable) 5849 RREG32(mmDB_RENDER_CONTROL); 5850 } 5851 5852 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5853 bool enable) 5854 { 5855 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5856 cz_enable_gfx_cg_power_gating(adev, true); 5857 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5858 cz_enable_gfx_pipeline_power_gating(adev, true); 5859 } else { 5860 cz_enable_gfx_cg_power_gating(adev, false); 5861 cz_enable_gfx_pipeline_power_gating(adev, false); 5862 } 5863 } 5864 5865 static int gfx_v8_0_set_powergating_state(void *handle, 5866 enum amd_powergating_state state) 5867 { 5868 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5869 bool enable = (state == AMD_PG_STATE_GATE); 5870 5871 if (amdgpu_sriov_vf(adev)) 5872 return 0; 5873 5874 switch (adev->asic_type) { 5875 case CHIP_CARRIZO: 5876 case CHIP_STONEY: 5877 5878 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5879 cz_enable_sck_slow_down_on_power_up(adev, true); 5880 cz_enable_sck_slow_down_on_power_down(adev, true); 5881 } else { 5882 cz_enable_sck_slow_down_on_power_up(adev, false); 5883 cz_enable_sck_slow_down_on_power_down(adev, false); 5884 } 5885 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5886 cz_enable_cp_power_gating(adev, true); 5887 else 5888 cz_enable_cp_power_gating(adev, false); 5889 5890 cz_update_gfx_cg_power_gating(adev, enable); 5891 5892 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5893 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5894 else 5895 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5896 5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5898 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5899 else 5900 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5901 break; 5902 case CHIP_POLARIS11: 5903 case CHIP_POLARIS12: 5904 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5905 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5906 else 5907 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5908 5909 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5910 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5911 else 5912 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5913 5914 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5915 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5916 else 5917 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5918 break; 5919 default: 5920 break; 5921 } 5922 5923 return 0; 5924 } 5925 5926 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5927 { 5928 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5929 int data; 5930 5931 if (amdgpu_sriov_vf(adev)) 5932 *flags = 0; 5933 5934 /* AMD_CG_SUPPORT_GFX_MGCG */ 5935 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5936 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5937 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5938 5939 /* AMD_CG_SUPPORT_GFX_CGLG */ 5940 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5941 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5942 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5943 5944 /* AMD_CG_SUPPORT_GFX_CGLS */ 5945 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5946 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5947 5948 /* AMD_CG_SUPPORT_GFX_CGTS */ 5949 data = RREG32(mmCGTS_SM_CTRL_REG); 5950 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5951 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5952 5953 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5954 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5955 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5956 5957 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5958 data = RREG32(mmRLC_MEM_SLP_CNTL); 5959 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5960 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5961 5962 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5963 data = RREG32(mmCP_MEM_SLP_CNTL); 5964 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5965 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5966 } 5967 5968 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5969 uint32_t reg_addr, uint32_t cmd) 5970 { 5971 uint32_t data; 5972 5973 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5974 5975 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5976 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5977 5978 data = RREG32(mmRLC_SERDES_WR_CTRL); 5979 if (adev->asic_type == CHIP_STONEY) 5980 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5981 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5982 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5983 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5984 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5985 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5986 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5987 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5988 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5989 else 5990 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5991 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5992 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5993 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5994 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5995 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5996 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5997 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5998 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5999 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 6000 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 6001 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 6002 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 6003 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 6004 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 6005 6006 WREG32(mmRLC_SERDES_WR_CTRL, data); 6007 } 6008 6009 #define MSG_ENTER_RLC_SAFE_MODE 1 6010 #define MSG_EXIT_RLC_SAFE_MODE 0 6011 #define RLC_GPR_REG2__REQ_MASK 0x00000001 6012 #define RLC_GPR_REG2__REQ__SHIFT 0 6013 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 6014 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 6015 6016 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 6017 { 6018 u32 data; 6019 unsigned i; 6020 6021 data = RREG32(mmRLC_CNTL); 6022 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 6023 return; 6024 6025 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 6026 data |= RLC_SAFE_MODE__CMD_MASK; 6027 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 6028 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 6029 WREG32(mmRLC_SAFE_MODE, data); 6030 6031 for (i = 0; i < adev->usec_timeout; i++) { 6032 if ((RREG32(mmRLC_GPM_STAT) & 6033 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 6034 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 6035 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 6036 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 6037 break; 6038 udelay(1); 6039 } 6040 6041 for (i = 0; i < adev->usec_timeout; i++) { 6042 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 6043 break; 6044 udelay(1); 6045 } 6046 adev->gfx.rlc.in_safe_mode = true; 6047 } 6048 } 6049 6050 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 6051 { 6052 u32 data = 0; 6053 unsigned i; 6054 6055 data = RREG32(mmRLC_CNTL); 6056 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 6057 return; 6058 6059 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 6060 if (adev->gfx.rlc.in_safe_mode) { 6061 data |= RLC_SAFE_MODE__CMD_MASK; 6062 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 6063 WREG32(mmRLC_SAFE_MODE, data); 6064 adev->gfx.rlc.in_safe_mode = false; 6065 } 6066 } 6067 6068 for (i = 0; i < adev->usec_timeout; i++) { 6069 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 6070 break; 6071 udelay(1); 6072 } 6073 } 6074 6075 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 6076 .enter_safe_mode = iceland_enter_rlc_safe_mode, 6077 .exit_safe_mode = iceland_exit_rlc_safe_mode 6078 }; 6079 6080 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 6081 bool enable) 6082 { 6083 uint32_t temp, data; 6084 6085 adev->gfx.rlc.funcs->enter_safe_mode(adev); 6086 6087 /* It is disabled by HW by default */ 6088 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 6089 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6090 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 6091 /* 1 - RLC memory Light sleep */ 6092 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 6093 6094 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 6095 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 6096 } 6097 6098 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 6099 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6100 if (adev->flags & AMD_IS_APU) 6101 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6102 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6103 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 6104 else 6105 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6106 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6107 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 6108 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 6109 6110 if (temp != data) 6111 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 6112 6113 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6114 gfx_v8_0_wait_for_rlc_serdes(adev); 6115 6116 /* 5 - clear mgcg override */ 6117 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6118 6119 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 6120 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 6121 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 6122 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 6123 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 6124 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 6125 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 6126 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 6127 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 6128 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 6129 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 6130 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 6131 if (temp != data) 6132 WREG32(mmCGTS_SM_CTRL_REG, data); 6133 } 6134 udelay(50); 6135 6136 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6137 gfx_v8_0_wait_for_rlc_serdes(adev); 6138 } else { 6139 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 6140 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6141 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6142 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6143 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 6144 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 6145 if (temp != data) 6146 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 6147 6148 /* 2 - disable MGLS in RLC */ 6149 data = RREG32(mmRLC_MEM_SLP_CNTL); 6150 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 6151 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 6152 WREG32(mmRLC_MEM_SLP_CNTL, data); 6153 } 6154 6155 /* 3 - disable MGLS in CP */ 6156 data = RREG32(mmCP_MEM_SLP_CNTL); 6157 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 6158 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 6159 WREG32(mmCP_MEM_SLP_CNTL, data); 6160 } 6161 6162 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 6163 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 6164 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 6165 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 6166 if (temp != data) 6167 WREG32(mmCGTS_SM_CTRL_REG, data); 6168 6169 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6170 gfx_v8_0_wait_for_rlc_serdes(adev); 6171 6172 /* 6 - set mgcg override */ 6173 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6174 6175 udelay(50); 6176 6177 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6178 gfx_v8_0_wait_for_rlc_serdes(adev); 6179 } 6180 6181 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6182 } 6183 6184 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 6185 bool enable) 6186 { 6187 uint32_t temp, temp1, data, data1; 6188 6189 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 6190 6191 adev->gfx.rlc.funcs->enter_safe_mode(adev); 6192 6193 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 6194 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6195 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 6196 if (temp1 != data1) 6197 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6198 6199 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6200 gfx_v8_0_wait_for_rlc_serdes(adev); 6201 6202 /* 2 - clear cgcg override */ 6203 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6204 6205 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6206 gfx_v8_0_wait_for_rlc_serdes(adev); 6207 6208 /* 3 - write cmd to set CGLS */ 6209 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6210 6211 /* 4 - enable cgcg */ 6212 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6213 6214 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6215 /* enable cgls*/ 6216 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6217 6218 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6219 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6220 6221 if (temp1 != data1) 6222 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6223 } else { 6224 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6225 } 6226 6227 if (temp != data) 6228 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6229 6230 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6231 * Cmp_busy/GFX_Idle interrupts 6232 */ 6233 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6234 } else { 6235 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6236 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6237 6238 /* TEST CGCG */ 6239 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6240 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6241 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6242 if (temp1 != data1) 6243 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6244 6245 /* read gfx register to wake up cgcg */ 6246 RREG32(mmCB_CGTT_SCLK_CTRL); 6247 RREG32(mmCB_CGTT_SCLK_CTRL); 6248 RREG32(mmCB_CGTT_SCLK_CTRL); 6249 RREG32(mmCB_CGTT_SCLK_CTRL); 6250 6251 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6252 gfx_v8_0_wait_for_rlc_serdes(adev); 6253 6254 /* write cmd to Set CGCG Overrride */ 6255 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6256 6257 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6258 gfx_v8_0_wait_for_rlc_serdes(adev); 6259 6260 /* write cmd to Clear CGLS */ 6261 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6262 6263 /* disable cgcg, cgls should be disabled too. */ 6264 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6265 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6266 if (temp != data) 6267 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6268 } 6269 6270 gfx_v8_0_wait_for_rlc_serdes(adev); 6271 6272 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6273 } 6274 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6275 bool enable) 6276 { 6277 if (enable) { 6278 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6279 * === MGCG + MGLS + TS(CG/LS) === 6280 */ 6281 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6282 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6283 } else { 6284 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6285 * === CGCG + CGLS === 6286 */ 6287 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6288 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6289 } 6290 return 0; 6291 } 6292 6293 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6294 enum amd_clockgating_state state) 6295 { 6296 uint32_t msg_id, pp_state = 0; 6297 uint32_t pp_support_state = 0; 6298 void *pp_handle = adev->powerplay.pp_handle; 6299 6300 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6301 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6302 pp_support_state = PP_STATE_SUPPORT_LS; 6303 pp_state = PP_STATE_LS; 6304 } 6305 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6306 pp_support_state |= PP_STATE_SUPPORT_CG; 6307 pp_state |= PP_STATE_CG; 6308 } 6309 if (state == AMD_CG_STATE_UNGATE) 6310 pp_state = 0; 6311 6312 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6313 PP_BLOCK_GFX_CG, 6314 pp_support_state, 6315 pp_state); 6316 amd_set_clockgating_by_smu(pp_handle, msg_id); 6317 } 6318 6319 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6320 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6321 pp_support_state = PP_STATE_SUPPORT_LS; 6322 pp_state = PP_STATE_LS; 6323 } 6324 6325 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6326 pp_support_state |= PP_STATE_SUPPORT_CG; 6327 pp_state |= PP_STATE_CG; 6328 } 6329 6330 if (state == AMD_CG_STATE_UNGATE) 6331 pp_state = 0; 6332 6333 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6334 PP_BLOCK_GFX_MG, 6335 pp_support_state, 6336 pp_state); 6337 amd_set_clockgating_by_smu(pp_handle, msg_id); 6338 } 6339 6340 return 0; 6341 } 6342 6343 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6344 enum amd_clockgating_state state) 6345 { 6346 6347 uint32_t msg_id, pp_state = 0; 6348 uint32_t pp_support_state = 0; 6349 void *pp_handle = adev->powerplay.pp_handle; 6350 6351 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6352 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6353 pp_support_state = PP_STATE_SUPPORT_LS; 6354 pp_state = PP_STATE_LS; 6355 } 6356 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6357 pp_support_state |= PP_STATE_SUPPORT_CG; 6358 pp_state |= PP_STATE_CG; 6359 } 6360 if (state == AMD_CG_STATE_UNGATE) 6361 pp_state = 0; 6362 6363 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6364 PP_BLOCK_GFX_CG, 6365 pp_support_state, 6366 pp_state); 6367 amd_set_clockgating_by_smu(pp_handle, msg_id); 6368 } 6369 6370 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6371 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6372 pp_support_state = PP_STATE_SUPPORT_LS; 6373 pp_state = PP_STATE_LS; 6374 } 6375 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6376 pp_support_state |= PP_STATE_SUPPORT_CG; 6377 pp_state |= PP_STATE_CG; 6378 } 6379 if (state == AMD_CG_STATE_UNGATE) 6380 pp_state = 0; 6381 6382 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6383 PP_BLOCK_GFX_3D, 6384 pp_support_state, 6385 pp_state); 6386 amd_set_clockgating_by_smu(pp_handle, msg_id); 6387 } 6388 6389 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6390 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6391 pp_support_state = PP_STATE_SUPPORT_LS; 6392 pp_state = PP_STATE_LS; 6393 } 6394 6395 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6396 pp_support_state |= PP_STATE_SUPPORT_CG; 6397 pp_state |= PP_STATE_CG; 6398 } 6399 6400 if (state == AMD_CG_STATE_UNGATE) 6401 pp_state = 0; 6402 6403 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6404 PP_BLOCK_GFX_MG, 6405 pp_support_state, 6406 pp_state); 6407 amd_set_clockgating_by_smu(pp_handle, msg_id); 6408 } 6409 6410 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6411 pp_support_state = PP_STATE_SUPPORT_LS; 6412 6413 if (state == AMD_CG_STATE_UNGATE) 6414 pp_state = 0; 6415 else 6416 pp_state = PP_STATE_LS; 6417 6418 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6419 PP_BLOCK_GFX_RLC, 6420 pp_support_state, 6421 pp_state); 6422 amd_set_clockgating_by_smu(pp_handle, msg_id); 6423 } 6424 6425 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6426 pp_support_state = PP_STATE_SUPPORT_LS; 6427 6428 if (state == AMD_CG_STATE_UNGATE) 6429 pp_state = 0; 6430 else 6431 pp_state = PP_STATE_LS; 6432 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6433 PP_BLOCK_GFX_CP, 6434 pp_support_state, 6435 pp_state); 6436 amd_set_clockgating_by_smu(pp_handle, msg_id); 6437 } 6438 6439 return 0; 6440 } 6441 6442 static int gfx_v8_0_set_clockgating_state(void *handle, 6443 enum amd_clockgating_state state) 6444 { 6445 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6446 6447 if (amdgpu_sriov_vf(adev)) 6448 return 0; 6449 6450 switch (adev->asic_type) { 6451 case CHIP_FIJI: 6452 case CHIP_CARRIZO: 6453 case CHIP_STONEY: 6454 gfx_v8_0_update_gfx_clock_gating(adev, 6455 state == AMD_CG_STATE_GATE); 6456 break; 6457 case CHIP_TONGA: 6458 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6459 break; 6460 case CHIP_POLARIS10: 6461 case CHIP_POLARIS11: 6462 case CHIP_POLARIS12: 6463 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6464 break; 6465 default: 6466 break; 6467 } 6468 return 0; 6469 } 6470 6471 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6472 { 6473 return ring->adev->wb.wb[ring->rptr_offs]; 6474 } 6475 6476 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6477 { 6478 struct amdgpu_device *adev = ring->adev; 6479 6480 if (ring->use_doorbell) 6481 /* XXX check if swapping is necessary on BE */ 6482 return ring->adev->wb.wb[ring->wptr_offs]; 6483 else 6484 return RREG32(mmCP_RB0_WPTR); 6485 } 6486 6487 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6488 { 6489 struct amdgpu_device *adev = ring->adev; 6490 6491 if (ring->use_doorbell) { 6492 /* XXX check if swapping is necessary on BE */ 6493 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6494 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6495 } else { 6496 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6497 (void)RREG32(mmCP_RB0_WPTR); 6498 } 6499 } 6500 6501 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6502 { 6503 u32 ref_and_mask, reg_mem_engine; 6504 6505 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6506 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6507 switch (ring->me) { 6508 case 1: 6509 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6510 break; 6511 case 2: 6512 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6513 break; 6514 default: 6515 return; 6516 } 6517 reg_mem_engine = 0; 6518 } else { 6519 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6520 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6521 } 6522 6523 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6524 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6525 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6526 reg_mem_engine)); 6527 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6528 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6529 amdgpu_ring_write(ring, ref_and_mask); 6530 amdgpu_ring_write(ring, ref_and_mask); 6531 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6532 } 6533 6534 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6535 { 6536 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6537 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6538 EVENT_INDEX(4)); 6539 6540 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6541 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6542 EVENT_INDEX(0)); 6543 } 6544 6545 6546 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6547 { 6548 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6549 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6550 WRITE_DATA_DST_SEL(0) | 6551 WR_CONFIRM)); 6552 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6553 amdgpu_ring_write(ring, 0); 6554 amdgpu_ring_write(ring, 1); 6555 6556 } 6557 6558 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6559 struct amdgpu_ib *ib, 6560 unsigned vm_id, bool ctx_switch) 6561 { 6562 u32 header, control = 0; 6563 6564 if (ib->flags & AMDGPU_IB_FLAG_CE) 6565 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6566 else 6567 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6568 6569 control |= ib->length_dw | (vm_id << 24); 6570 6571 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) 6572 control |= INDIRECT_BUFFER_PRE_ENB(1); 6573 6574 amdgpu_ring_write(ring, header); 6575 amdgpu_ring_write(ring, 6576 #ifdef __BIG_ENDIAN 6577 (2 << 0) | 6578 #endif 6579 (ib->gpu_addr & 0xFFFFFFFC)); 6580 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6581 amdgpu_ring_write(ring, control); 6582 } 6583 6584 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6585 struct amdgpu_ib *ib, 6586 unsigned vm_id, bool ctx_switch) 6587 { 6588 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6589 6590 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6591 amdgpu_ring_write(ring, 6592 #ifdef __BIG_ENDIAN 6593 (2 << 0) | 6594 #endif 6595 (ib->gpu_addr & 0xFFFFFFFC)); 6596 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6597 amdgpu_ring_write(ring, control); 6598 } 6599 6600 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6601 u64 seq, unsigned flags) 6602 { 6603 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6604 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6605 6606 /* EVENT_WRITE_EOP - flush caches, send int */ 6607 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6608 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6609 EOP_TC_ACTION_EN | 6610 EOP_TC_WB_ACTION_EN | 6611 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6612 EVENT_INDEX(5))); 6613 amdgpu_ring_write(ring, addr & 0xfffffffc); 6614 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6615 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6616 amdgpu_ring_write(ring, lower_32_bits(seq)); 6617 amdgpu_ring_write(ring, upper_32_bits(seq)); 6618 6619 } 6620 6621 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6622 { 6623 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6624 uint32_t seq = ring->fence_drv.sync_seq; 6625 uint64_t addr = ring->fence_drv.gpu_addr; 6626 6627 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6628 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6629 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6630 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6631 amdgpu_ring_write(ring, addr & 0xfffffffc); 6632 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6633 amdgpu_ring_write(ring, seq); 6634 amdgpu_ring_write(ring, 0xffffffff); 6635 amdgpu_ring_write(ring, 4); /* poll interval */ 6636 } 6637 6638 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6639 unsigned vm_id, uint64_t pd_addr) 6640 { 6641 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6642 6643 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6644 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6645 WRITE_DATA_DST_SEL(0)) | 6646 WR_CONFIRM); 6647 if (vm_id < 8) { 6648 amdgpu_ring_write(ring, 6649 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6650 } else { 6651 amdgpu_ring_write(ring, 6652 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6653 } 6654 amdgpu_ring_write(ring, 0); 6655 amdgpu_ring_write(ring, pd_addr >> 12); 6656 6657 /* bits 0-15 are the VM contexts0-15 */ 6658 /* invalidate the cache */ 6659 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6660 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6661 WRITE_DATA_DST_SEL(0))); 6662 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6663 amdgpu_ring_write(ring, 0); 6664 amdgpu_ring_write(ring, 1 << vm_id); 6665 6666 /* wait for the invalidate to complete */ 6667 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6668 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6669 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6670 WAIT_REG_MEM_ENGINE(0))); /* me */ 6671 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6672 amdgpu_ring_write(ring, 0); 6673 amdgpu_ring_write(ring, 0); /* ref */ 6674 amdgpu_ring_write(ring, 0); /* mask */ 6675 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6676 6677 /* compute doesn't have PFP */ 6678 if (usepfp) { 6679 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6680 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6681 amdgpu_ring_write(ring, 0x0); 6682 } 6683 } 6684 6685 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6686 { 6687 return ring->adev->wb.wb[ring->wptr_offs]; 6688 } 6689 6690 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6691 { 6692 struct amdgpu_device *adev = ring->adev; 6693 6694 /* XXX check if swapping is necessary on BE */ 6695 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6696 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6697 } 6698 6699 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6700 u64 addr, u64 seq, 6701 unsigned flags) 6702 { 6703 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6704 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6705 6706 /* RELEASE_MEM - flush caches, send int */ 6707 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6708 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6709 EOP_TC_ACTION_EN | 6710 EOP_TC_WB_ACTION_EN | 6711 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6712 EVENT_INDEX(5))); 6713 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6714 amdgpu_ring_write(ring, addr & 0xfffffffc); 6715 amdgpu_ring_write(ring, upper_32_bits(addr)); 6716 amdgpu_ring_write(ring, lower_32_bits(seq)); 6717 amdgpu_ring_write(ring, upper_32_bits(seq)); 6718 } 6719 6720 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6721 u64 seq, unsigned int flags) 6722 { 6723 /* we only allocate 32bit for each seq wb address */ 6724 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6725 6726 /* write fence seq to the "addr" */ 6727 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6728 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6729 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6730 amdgpu_ring_write(ring, lower_32_bits(addr)); 6731 amdgpu_ring_write(ring, upper_32_bits(addr)); 6732 amdgpu_ring_write(ring, lower_32_bits(seq)); 6733 6734 if (flags & AMDGPU_FENCE_FLAG_INT) { 6735 /* set register to trigger INT */ 6736 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6737 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6738 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6739 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6740 amdgpu_ring_write(ring, 0); 6741 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6742 } 6743 } 6744 6745 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6746 { 6747 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6748 amdgpu_ring_write(ring, 0); 6749 } 6750 6751 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6752 { 6753 uint32_t dw2 = 0; 6754 6755 if (amdgpu_sriov_vf(ring->adev)) 6756 gfx_v8_0_ring_emit_ce_meta_init(ring, 6757 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); 6758 6759 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6760 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6761 gfx_v8_0_ring_emit_vgt_flush(ring); 6762 /* set load_global_config & load_global_uconfig */ 6763 dw2 |= 0x8001; 6764 /* set load_cs_sh_regs */ 6765 dw2 |= 0x01000000; 6766 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6767 dw2 |= 0x10002; 6768 6769 /* set load_ce_ram if preamble presented */ 6770 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6771 dw2 |= 0x10000000; 6772 } else { 6773 /* still load_ce_ram if this is the first time preamble presented 6774 * although there is no context switch happens. 6775 */ 6776 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6777 dw2 |= 0x10000000; 6778 } 6779 6780 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6781 amdgpu_ring_write(ring, dw2); 6782 amdgpu_ring_write(ring, 0); 6783 6784 if (amdgpu_sriov_vf(ring->adev)) 6785 gfx_v8_0_ring_emit_de_meta_init(ring, 6786 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); 6787 } 6788 6789 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6790 { 6791 unsigned ret; 6792 6793 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6794 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6795 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6796 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6797 ret = ring->wptr & ring->buf_mask; 6798 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6799 return ret; 6800 } 6801 6802 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6803 { 6804 unsigned cur; 6805 6806 BUG_ON(offset > ring->buf_mask); 6807 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6808 6809 cur = (ring->wptr & ring->buf_mask) - 1; 6810 if (likely(cur > offset)) 6811 ring->ring[offset] = cur - offset; 6812 else 6813 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6814 } 6815 6816 6817 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6818 { 6819 struct amdgpu_device *adev = ring->adev; 6820 6821 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6822 amdgpu_ring_write(ring, 0 | /* src: register*/ 6823 (5 << 8) | /* dst: memory */ 6824 (1 << 20)); /* write confirm */ 6825 amdgpu_ring_write(ring, reg); 6826 amdgpu_ring_write(ring, 0); 6827 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6828 adev->virt.reg_val_offs * 4)); 6829 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6830 adev->virt.reg_val_offs * 4)); 6831 } 6832 6833 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6834 uint32_t val) 6835 { 6836 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6837 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6838 amdgpu_ring_write(ring, reg); 6839 amdgpu_ring_write(ring, 0); 6840 amdgpu_ring_write(ring, val); 6841 } 6842 6843 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6844 enum amdgpu_interrupt_state state) 6845 { 6846 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6847 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6848 } 6849 6850 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6851 int me, int pipe, 6852 enum amdgpu_interrupt_state state) 6853 { 6854 /* 6855 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6856 * handles the setting of interrupts for this specific pipe. All other 6857 * pipes' interrupts are set by amdkfd. 6858 */ 6859 6860 if (me == 1) { 6861 switch (pipe) { 6862 case 0: 6863 break; 6864 default: 6865 DRM_DEBUG("invalid pipe %d\n", pipe); 6866 return; 6867 } 6868 } else { 6869 DRM_DEBUG("invalid me %d\n", me); 6870 return; 6871 } 6872 6873 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6874 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6875 } 6876 6877 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6878 struct amdgpu_irq_src *source, 6879 unsigned type, 6880 enum amdgpu_interrupt_state state) 6881 { 6882 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6883 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6884 6885 return 0; 6886 } 6887 6888 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6889 struct amdgpu_irq_src *source, 6890 unsigned type, 6891 enum amdgpu_interrupt_state state) 6892 { 6893 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6894 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6895 6896 return 0; 6897 } 6898 6899 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6900 struct amdgpu_irq_src *src, 6901 unsigned type, 6902 enum amdgpu_interrupt_state state) 6903 { 6904 switch (type) { 6905 case AMDGPU_CP_IRQ_GFX_EOP: 6906 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6907 break; 6908 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6909 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6910 break; 6911 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6912 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6913 break; 6914 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6915 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6916 break; 6917 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6918 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6919 break; 6920 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6921 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6922 break; 6923 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6924 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6925 break; 6926 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6927 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6928 break; 6929 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6930 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6931 break; 6932 default: 6933 break; 6934 } 6935 return 0; 6936 } 6937 6938 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6939 struct amdgpu_irq_src *source, 6940 struct amdgpu_iv_entry *entry) 6941 { 6942 int i; 6943 u8 me_id, pipe_id, queue_id; 6944 struct amdgpu_ring *ring; 6945 6946 DRM_DEBUG("IH: CP EOP\n"); 6947 me_id = (entry->ring_id & 0x0c) >> 2; 6948 pipe_id = (entry->ring_id & 0x03) >> 0; 6949 queue_id = (entry->ring_id & 0x70) >> 4; 6950 6951 switch (me_id) { 6952 case 0: 6953 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6954 break; 6955 case 1: 6956 case 2: 6957 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6958 ring = &adev->gfx.compute_ring[i]; 6959 /* Per-queue interrupt is supported for MEC starting from VI. 6960 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6961 */ 6962 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6963 amdgpu_fence_process(ring); 6964 } 6965 break; 6966 } 6967 return 0; 6968 } 6969 6970 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6971 struct amdgpu_irq_src *source, 6972 struct amdgpu_iv_entry *entry) 6973 { 6974 DRM_ERROR("Illegal register access in command stream\n"); 6975 schedule_work(&adev->reset_work); 6976 return 0; 6977 } 6978 6979 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6980 struct amdgpu_irq_src *source, 6981 struct amdgpu_iv_entry *entry) 6982 { 6983 DRM_ERROR("Illegal instruction in command stream\n"); 6984 schedule_work(&adev->reset_work); 6985 return 0; 6986 } 6987 6988 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6989 struct amdgpu_irq_src *src, 6990 unsigned int type, 6991 enum amdgpu_interrupt_state state) 6992 { 6993 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6994 6995 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); 6996 6997 switch (type) { 6998 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6999 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 7000 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7001 if (ring->me == 1) 7002 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 7003 ring->pipe, 7004 GENERIC2_INT_ENABLE, 7005 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7006 else 7007 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 7008 ring->pipe, 7009 GENERIC2_INT_ENABLE, 7010 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7011 break; 7012 default: 7013 BUG(); /* kiq only support GENERIC2_INT now */ 7014 break; 7015 } 7016 return 0; 7017 } 7018 7019 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7020 struct amdgpu_irq_src *source, 7021 struct amdgpu_iv_entry *entry) 7022 { 7023 u8 me_id, pipe_id, queue_id; 7024 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7025 7026 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); 7027 7028 me_id = (entry->ring_id & 0x0c) >> 2; 7029 pipe_id = (entry->ring_id & 0x03) >> 0; 7030 queue_id = (entry->ring_id & 0x70) >> 4; 7031 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7032 me_id, pipe_id, queue_id); 7033 7034 amdgpu_fence_process(ring); 7035 return 0; 7036 } 7037 7038 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7039 .name = "gfx_v8_0", 7040 .early_init = gfx_v8_0_early_init, 7041 .late_init = gfx_v8_0_late_init, 7042 .sw_init = gfx_v8_0_sw_init, 7043 .sw_fini = gfx_v8_0_sw_fini, 7044 .hw_init = gfx_v8_0_hw_init, 7045 .hw_fini = gfx_v8_0_hw_fini, 7046 .suspend = gfx_v8_0_suspend, 7047 .resume = gfx_v8_0_resume, 7048 .is_idle = gfx_v8_0_is_idle, 7049 .wait_for_idle = gfx_v8_0_wait_for_idle, 7050 .check_soft_reset = gfx_v8_0_check_soft_reset, 7051 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7052 .soft_reset = gfx_v8_0_soft_reset, 7053 .post_soft_reset = gfx_v8_0_post_soft_reset, 7054 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7055 .set_powergating_state = gfx_v8_0_set_powergating_state, 7056 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7057 }; 7058 7059 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7060 .type = AMDGPU_RING_TYPE_GFX, 7061 .align_mask = 0xff, 7062 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7063 .support_64bit_ptrs = false, 7064 .get_rptr = gfx_v8_0_ring_get_rptr, 7065 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7066 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7067 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7068 5 + /* COND_EXEC */ 7069 7 + /* PIPELINE_SYNC */ 7070 19 + /* VM_FLUSH */ 7071 8 + /* FENCE for VM_FLUSH */ 7072 20 + /* GDS switch */ 7073 4 + /* double SWITCH_BUFFER, 7074 the first COND_EXEC jump to the place just 7075 prior to this double SWITCH_BUFFER */ 7076 5 + /* COND_EXEC */ 7077 7 + /* HDP_flush */ 7078 4 + /* VGT_flush */ 7079 14 + /* CE_META */ 7080 31 + /* DE_META */ 7081 3 + /* CNTX_CTRL */ 7082 5 + /* HDP_INVL */ 7083 8 + 8 + /* FENCE x2 */ 7084 2, /* SWITCH_BUFFER */ 7085 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7086 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7087 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7088 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7089 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7090 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7091 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7092 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7093 .test_ring = gfx_v8_0_ring_test_ring, 7094 .test_ib = gfx_v8_0_ring_test_ib, 7095 .insert_nop = amdgpu_ring_insert_nop, 7096 .pad_ib = amdgpu_ring_generic_pad_ib, 7097 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7098 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7099 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7100 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7101 }; 7102 7103 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7104 .type = AMDGPU_RING_TYPE_COMPUTE, 7105 .align_mask = 0xff, 7106 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7107 .support_64bit_ptrs = false, 7108 .get_rptr = gfx_v8_0_ring_get_rptr, 7109 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7110 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7111 .emit_frame_size = 7112 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7113 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7114 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7115 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7116 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7117 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7118 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7119 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7120 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7121 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7122 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7123 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7124 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7125 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7126 .test_ring = gfx_v8_0_ring_test_ring, 7127 .test_ib = gfx_v8_0_ring_test_ib, 7128 .insert_nop = amdgpu_ring_insert_nop, 7129 .pad_ib = amdgpu_ring_generic_pad_ib, 7130 }; 7131 7132 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7133 .type = AMDGPU_RING_TYPE_KIQ, 7134 .align_mask = 0xff, 7135 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7136 .support_64bit_ptrs = false, 7137 .get_rptr = gfx_v8_0_ring_get_rptr, 7138 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7139 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7140 .emit_frame_size = 7141 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7142 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7143 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7144 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7145 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7146 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7147 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7148 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7149 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7150 .test_ring = gfx_v8_0_ring_test_ring, 7151 .test_ib = gfx_v8_0_ring_test_ib, 7152 .insert_nop = amdgpu_ring_insert_nop, 7153 .pad_ib = amdgpu_ring_generic_pad_ib, 7154 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7155 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7156 }; 7157 7158 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7159 { 7160 int i; 7161 7162 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7163 7164 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7165 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7166 7167 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7168 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7169 } 7170 7171 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7172 .set = gfx_v8_0_set_eop_interrupt_state, 7173 .process = gfx_v8_0_eop_irq, 7174 }; 7175 7176 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7177 .set = gfx_v8_0_set_priv_reg_fault_state, 7178 .process = gfx_v8_0_priv_reg_irq, 7179 }; 7180 7181 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7182 .set = gfx_v8_0_set_priv_inst_fault_state, 7183 .process = gfx_v8_0_priv_inst_irq, 7184 }; 7185 7186 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7187 .set = gfx_v8_0_kiq_set_interrupt_state, 7188 .process = gfx_v8_0_kiq_irq, 7189 }; 7190 7191 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7192 { 7193 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7194 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7195 7196 adev->gfx.priv_reg_irq.num_types = 1; 7197 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7198 7199 adev->gfx.priv_inst_irq.num_types = 1; 7200 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7201 7202 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7203 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7204 } 7205 7206 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7207 { 7208 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7209 } 7210 7211 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7212 { 7213 /* init asci gds info */ 7214 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7215 adev->gds.gws.total_size = 64; 7216 adev->gds.oa.total_size = 16; 7217 7218 if (adev->gds.mem.total_size == 64 * 1024) { 7219 adev->gds.mem.gfx_partition_size = 4096; 7220 adev->gds.mem.cs_partition_size = 4096; 7221 7222 adev->gds.gws.gfx_partition_size = 4; 7223 adev->gds.gws.cs_partition_size = 4; 7224 7225 adev->gds.oa.gfx_partition_size = 4; 7226 adev->gds.oa.cs_partition_size = 1; 7227 } else { 7228 adev->gds.mem.gfx_partition_size = 1024; 7229 adev->gds.mem.cs_partition_size = 1024; 7230 7231 adev->gds.gws.gfx_partition_size = 16; 7232 adev->gds.gws.cs_partition_size = 16; 7233 7234 adev->gds.oa.gfx_partition_size = 4; 7235 adev->gds.oa.cs_partition_size = 4; 7236 } 7237 } 7238 7239 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7240 u32 bitmap) 7241 { 7242 u32 data; 7243 7244 if (!bitmap) 7245 return; 7246 7247 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7248 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7249 7250 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7251 } 7252 7253 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7254 { 7255 u32 data, mask; 7256 7257 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7258 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7259 7260 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 7261 7262 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7263 } 7264 7265 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7266 { 7267 int i, j, k, counter, active_cu_number = 0; 7268 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7269 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7270 unsigned disable_masks[4 * 2]; 7271 7272 memset(cu_info, 0, sizeof(*cu_info)); 7273 7274 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7275 7276 mutex_lock(&adev->grbm_idx_mutex); 7277 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7278 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7279 mask = 1; 7280 ao_bitmap = 0; 7281 counter = 0; 7282 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7283 if (i < 4 && j < 2) 7284 gfx_v8_0_set_user_cu_inactive_bitmap( 7285 adev, disable_masks[i * 2 + j]); 7286 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7287 cu_info->bitmap[i][j] = bitmap; 7288 7289 for (k = 0; k < 16; k ++) { 7290 if (bitmap & mask) { 7291 if (counter < 2) 7292 ao_bitmap |= mask; 7293 counter ++; 7294 } 7295 mask <<= 1; 7296 } 7297 active_cu_number += counter; 7298 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7299 } 7300 } 7301 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7302 mutex_unlock(&adev->grbm_idx_mutex); 7303 7304 cu_info->number = active_cu_number; 7305 cu_info->ao_cu_mask = ao_cu_mask; 7306 } 7307 7308 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7309 { 7310 .type = AMD_IP_BLOCK_TYPE_GFX, 7311 .major = 8, 7312 .minor = 0, 7313 .rev = 0, 7314 .funcs = &gfx_v8_0_ip_funcs, 7315 }; 7316 7317 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7318 { 7319 .type = AMD_IP_BLOCK_TYPE_GFX, 7320 .major = 8, 7321 .minor = 1, 7322 .rev = 0, 7323 .funcs = &gfx_v8_0_ip_funcs, 7324 }; 7325 7326 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7327 { 7328 uint64_t ce_payload_addr; 7329 int cnt_ce; 7330 static union { 7331 struct vi_ce_ib_state regular; 7332 struct vi_ce_ib_state_chained_ib chained; 7333 } ce_payload = {}; 7334 7335 if (ring->adev->virt.chained_ib_support) { 7336 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7337 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7338 } else { 7339 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); 7340 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7341 } 7342 7343 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7344 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7345 WRITE_DATA_DST_SEL(8) | 7346 WR_CONFIRM) | 7347 WRITE_DATA_CACHE_POLICY(0)); 7348 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7349 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7350 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7351 } 7352 7353 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7354 { 7355 uint64_t de_payload_addr, gds_addr; 7356 int cnt_de; 7357 static union { 7358 struct vi_de_ib_state regular; 7359 struct vi_de_ib_state_chained_ib chained; 7360 } de_payload = {}; 7361 7362 gds_addr = csa_addr + 4096; 7363 if (ring->adev->virt.chained_ib_support) { 7364 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7365 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7366 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7367 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7368 } else { 7369 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7370 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7371 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7372 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7373 } 7374 7375 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7376 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7377 WRITE_DATA_DST_SEL(8) | 7378 WR_CONFIRM) | 7379 WRITE_DATA_CACHE_POLICY(0)); 7380 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7381 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7382 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7383 } 7384 7385 /* create MQD for each compute queue */ 7386 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) 7387 { 7388 struct amdgpu_ring *ring = NULL; 7389 int r, i; 7390 7391 /* create MQD for KIQ */ 7392 ring = &adev->gfx.kiq.ring; 7393 if (!ring->mqd_obj) { 7394 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, 7395 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 7396 &ring->mqd_gpu_addr, &ring->mqd_ptr); 7397 if (r) { 7398 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 7399 return r; 7400 } 7401 7402 /* prepare MQD backup */ 7403 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); 7404 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) 7405 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 7406 } 7407 7408 /* create MQD for each KCQ */ 7409 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 7410 ring = &adev->gfx.compute_ring[i]; 7411 if (!ring->mqd_obj) { 7412 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, 7413 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 7414 &ring->mqd_gpu_addr, &ring->mqd_ptr); 7415 if (r) { 7416 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 7417 return r; 7418 } 7419 7420 /* prepare MQD backup */ 7421 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); 7422 if (!adev->gfx.mec.mqd_backup[i]) 7423 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 7424 } 7425 } 7426 7427 return 0; 7428 } 7429 7430 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) 7431 { 7432 struct amdgpu_ring *ring = NULL; 7433 int i; 7434 7435 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 7436 ring = &adev->gfx.compute_ring[i]; 7437 kfree(adev->gfx.mec.mqd_backup[i]); 7438 amdgpu_bo_free_kernel(&ring->mqd_obj, 7439 &ring->mqd_gpu_addr, 7440 &ring->mqd_ptr); 7441 } 7442 7443 ring = &adev->gfx.kiq.ring; 7444 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); 7445 amdgpu_bo_free_kernel(&ring->mqd_obj, 7446 &ring->mqd_gpu_addr, 7447 &ring->mqd_ptr); 7448 } 7449