1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vid.h" 29 #include "amdgpu_ucode.h" 30 #include "clearstate_vi.h" 31 32 #include "gmc/gmc_8_2_d.h" 33 #include "gmc/gmc_8_2_sh_mask.h" 34 35 #include "oss/oss_3_0_d.h" 36 #include "oss/oss_3_0_sh_mask.h" 37 38 #include "bif/bif_5_0_d.h" 39 #include "bif/bif_5_0_sh_mask.h" 40 41 #include "gca/gfx_8_0_d.h" 42 #include "gca/gfx_8_0_enum.h" 43 #include "gca/gfx_8_0_sh_mask.h" 44 #include "gca/gfx_8_0_enum.h" 45 46 #include "uvd/uvd_5_0_d.h" 47 #include "uvd/uvd_5_0_sh_mask.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #define GFX8_NUM_GFX_RINGS 1 53 #define GFX8_NUM_COMPUTE_RINGS 8 54 55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 58 59 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 60 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 61 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 62 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 63 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 64 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 65 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 66 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 67 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 68 69 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 70 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 71 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 72 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 73 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 74 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 75 76 /* BPM SERDES CMD */ 77 #define SET_BPM_SERDES_CMD 1 78 #define CLE_BPM_SERDES_CMD 0 79 80 /* BPM Register Address*/ 81 enum { 82 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 83 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 84 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 85 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 86 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 87 BPM_REG_FGCG_MAX 88 }; 89 90 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 91 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 93 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 96 97 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 100 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 102 103 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 104 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 105 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 106 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 107 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 111 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 113 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 119 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 122 123 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 124 { 125 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 126 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 127 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 128 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 129 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 130 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 131 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 132 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 133 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 134 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 135 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 136 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 137 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 138 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 139 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 140 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 141 }; 142 143 static const u32 golden_settings_tonga_a11[] = 144 { 145 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 146 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 147 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 148 mmGB_GPU_ID, 0x0000000f, 0x00000000, 149 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 150 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 151 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 152 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 153 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 154 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 155 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 156 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 157 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 158 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 159 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 160 }; 161 162 static const u32 tonga_golden_common_all[] = 163 { 164 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 165 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 166 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 167 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 168 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 169 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 170 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 171 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 172 }; 173 174 static const u32 tonga_mgcg_cgcg_init[] = 175 { 176 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 177 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 178 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 179 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 180 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 181 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 182 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 183 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 184 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 185 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 186 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 187 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 188 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 189 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 190 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 191 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 192 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 193 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 194 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 195 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 196 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 197 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 198 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 200 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 201 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 202 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 203 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 204 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 205 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 207 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 208 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 209 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 210 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 211 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 212 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 213 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 214 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 215 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 216 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 217 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 218 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 219 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 220 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 221 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 222 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 223 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 224 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 225 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 226 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 227 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 228 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 229 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 230 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 231 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 232 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 233 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 234 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 235 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 236 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 237 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 238 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 239 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 240 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 241 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 242 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 243 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 244 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 245 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 246 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 247 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 248 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 249 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 250 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 251 }; 252 253 static const u32 fiji_golden_common_all[] = 254 { 255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 256 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 257 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 258 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 259 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 260 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 261 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 262 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 263 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 264 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 265 }; 266 267 static const u32 golden_settings_fiji_a10[] = 268 { 269 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 270 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 271 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 272 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 273 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 274 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 275 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 276 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 277 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 278 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 279 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 280 }; 281 282 static const u32 fiji_mgcg_cgcg_init[] = 283 { 284 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 285 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 286 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 287 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 288 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 289 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 290 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 291 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 292 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 293 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 294 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 295 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 296 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 297 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 298 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 299 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 300 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 301 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 302 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 303 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 304 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 305 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 306 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 307 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 308 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 309 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 310 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 311 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 312 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 313 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 314 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 315 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 316 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 317 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 318 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 319 }; 320 321 static const u32 golden_settings_iceland_a11[] = 322 { 323 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 324 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 325 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 326 mmGB_GPU_ID, 0x0000000f, 0x00000000, 327 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 328 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 329 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 330 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 331 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 332 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 333 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 334 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 335 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 336 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 337 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 338 }; 339 340 static const u32 iceland_golden_common_all[] = 341 { 342 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 343 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 344 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 345 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 346 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 347 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 348 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 349 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 350 }; 351 352 static const u32 iceland_mgcg_cgcg_init[] = 353 { 354 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 355 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 356 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 357 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 358 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 359 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 360 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 361 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 362 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 363 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 364 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 365 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 366 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 367 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 368 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 369 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 370 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 371 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 372 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 373 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 374 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 375 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 376 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 377 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 379 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 380 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 381 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 382 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 383 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 384 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 385 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 386 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 387 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 388 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 389 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 390 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 391 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 392 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 393 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 394 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 395 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 396 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 397 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 398 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 399 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 400 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 401 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 402 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 403 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 404 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 405 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 406 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 407 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 408 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 409 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 410 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 411 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 412 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 413 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 414 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 415 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 416 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 417 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 418 }; 419 420 static const u32 cz_golden_settings_a11[] = 421 { 422 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 423 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 424 mmGB_GPU_ID, 0x0000000f, 0x00000000, 425 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 426 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 427 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 428 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 429 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 430 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 431 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 432 }; 433 434 static const u32 cz_golden_common_all[] = 435 { 436 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 437 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 438 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 439 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 440 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 441 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 442 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 443 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 444 }; 445 446 static const u32 cz_mgcg_cgcg_init[] = 447 { 448 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 450 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 455 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 456 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 457 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 459 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 460 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 462 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 463 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 464 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 465 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 466 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 467 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 468 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 469 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 470 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 471 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 472 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 473 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 474 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 475 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 476 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 477 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 478 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 479 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 480 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 481 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 482 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 483 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 484 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 485 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 486 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 487 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 488 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 489 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 490 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 491 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 492 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 493 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 494 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 495 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 496 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 497 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 498 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 499 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 500 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 501 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 502 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 503 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 504 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 505 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 506 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 507 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 508 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 509 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 510 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 511 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 512 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 513 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 514 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 515 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 516 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 517 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 518 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 519 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 520 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 521 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 522 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 523 }; 524 525 static const u32 stoney_golden_settings_a11[] = 526 { 527 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 528 mmGB_GPU_ID, 0x0000000f, 0x00000000, 529 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 530 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 531 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 532 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 533 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 534 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 535 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 536 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 537 }; 538 539 static const u32 stoney_golden_common_all[] = 540 { 541 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 542 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 543 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 544 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 545 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 546 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 547 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 548 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 549 }; 550 551 static const u32 stoney_mgcg_cgcg_init[] = 552 { 553 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 554 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 555 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 556 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 557 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 558 mmATC_MISC_CG, 0xffffffff, 0x000c0200, 559 }; 560 561 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 562 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 563 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 564 565 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 566 { 567 switch (adev->asic_type) { 568 case CHIP_TOPAZ: 569 amdgpu_program_register_sequence(adev, 570 iceland_mgcg_cgcg_init, 571 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 572 amdgpu_program_register_sequence(adev, 573 golden_settings_iceland_a11, 574 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 575 amdgpu_program_register_sequence(adev, 576 iceland_golden_common_all, 577 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 578 break; 579 case CHIP_FIJI: 580 amdgpu_program_register_sequence(adev, 581 fiji_mgcg_cgcg_init, 582 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 583 amdgpu_program_register_sequence(adev, 584 golden_settings_fiji_a10, 585 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 586 amdgpu_program_register_sequence(adev, 587 fiji_golden_common_all, 588 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 589 break; 590 591 case CHIP_TONGA: 592 amdgpu_program_register_sequence(adev, 593 tonga_mgcg_cgcg_init, 594 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 595 amdgpu_program_register_sequence(adev, 596 golden_settings_tonga_a11, 597 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 598 amdgpu_program_register_sequence(adev, 599 tonga_golden_common_all, 600 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 601 break; 602 case CHIP_CARRIZO: 603 amdgpu_program_register_sequence(adev, 604 cz_mgcg_cgcg_init, 605 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 606 amdgpu_program_register_sequence(adev, 607 cz_golden_settings_a11, 608 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 609 amdgpu_program_register_sequence(adev, 610 cz_golden_common_all, 611 (const u32)ARRAY_SIZE(cz_golden_common_all)); 612 break; 613 case CHIP_STONEY: 614 amdgpu_program_register_sequence(adev, 615 stoney_mgcg_cgcg_init, 616 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 617 amdgpu_program_register_sequence(adev, 618 stoney_golden_settings_a11, 619 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 620 amdgpu_program_register_sequence(adev, 621 stoney_golden_common_all, 622 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 623 break; 624 default: 625 break; 626 } 627 } 628 629 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 630 { 631 int i; 632 633 adev->gfx.scratch.num_reg = 7; 634 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 635 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 636 adev->gfx.scratch.free[i] = true; 637 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 638 } 639 } 640 641 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 642 { 643 struct amdgpu_device *adev = ring->adev; 644 uint32_t scratch; 645 uint32_t tmp = 0; 646 unsigned i; 647 int r; 648 649 r = amdgpu_gfx_scratch_get(adev, &scratch); 650 if (r) { 651 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 652 return r; 653 } 654 WREG32(scratch, 0xCAFEDEAD); 655 r = amdgpu_ring_alloc(ring, 3); 656 if (r) { 657 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 658 ring->idx, r); 659 amdgpu_gfx_scratch_free(adev, scratch); 660 return r; 661 } 662 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 663 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 664 amdgpu_ring_write(ring, 0xDEADBEEF); 665 amdgpu_ring_commit(ring); 666 667 for (i = 0; i < adev->usec_timeout; i++) { 668 tmp = RREG32(scratch); 669 if (tmp == 0xDEADBEEF) 670 break; 671 DRM_UDELAY(1); 672 } 673 if (i < adev->usec_timeout) { 674 DRM_INFO("ring test on %d succeeded in %d usecs\n", 675 ring->idx, i); 676 } else { 677 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 678 ring->idx, scratch, tmp); 679 r = -EINVAL; 680 } 681 amdgpu_gfx_scratch_free(adev, scratch); 682 return r; 683 } 684 685 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) 686 { 687 struct amdgpu_device *adev = ring->adev; 688 struct amdgpu_ib ib; 689 struct fence *f = NULL; 690 uint32_t scratch; 691 uint32_t tmp = 0; 692 unsigned i; 693 int r; 694 695 r = amdgpu_gfx_scratch_get(adev, &scratch); 696 if (r) { 697 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r); 698 return r; 699 } 700 WREG32(scratch, 0xCAFEDEAD); 701 memset(&ib, 0, sizeof(ib)); 702 r = amdgpu_ib_get(adev, NULL, 256, &ib); 703 if (r) { 704 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 705 goto err1; 706 } 707 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 708 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 709 ib.ptr[2] = 0xDEADBEEF; 710 ib.length_dw = 3; 711 712 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, 713 NULL, &f); 714 if (r) 715 goto err2; 716 717 r = fence_wait(f, false); 718 if (r) { 719 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 720 goto err2; 721 } 722 for (i = 0; i < adev->usec_timeout; i++) { 723 tmp = RREG32(scratch); 724 if (tmp == 0xDEADBEEF) 725 break; 726 DRM_UDELAY(1); 727 } 728 if (i < adev->usec_timeout) { 729 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", 730 ring->idx, i); 731 goto err2; 732 } else { 733 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 734 scratch, tmp); 735 r = -EINVAL; 736 } 737 err2: 738 fence_put(f); 739 amdgpu_ib_free(adev, &ib); 740 err1: 741 amdgpu_gfx_scratch_free(adev, scratch); 742 return r; 743 } 744 745 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 746 { 747 const char *chip_name; 748 char fw_name[30]; 749 int err; 750 struct amdgpu_firmware_info *info = NULL; 751 const struct common_firmware_header *header = NULL; 752 const struct gfx_firmware_header_v1_0 *cp_hdr; 753 754 DRM_DEBUG("\n"); 755 756 switch (adev->asic_type) { 757 case CHIP_TOPAZ: 758 chip_name = "topaz"; 759 break; 760 case CHIP_TONGA: 761 chip_name = "tonga"; 762 break; 763 case CHIP_CARRIZO: 764 chip_name = "carrizo"; 765 break; 766 case CHIP_FIJI: 767 chip_name = "fiji"; 768 break; 769 case CHIP_STONEY: 770 chip_name = "stoney"; 771 break; 772 default: 773 BUG(); 774 } 775 776 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 777 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 778 if (err) 779 goto out; 780 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 781 if (err) 782 goto out; 783 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 784 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 785 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 786 787 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 788 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 789 if (err) 790 goto out; 791 err = amdgpu_ucode_validate(adev->gfx.me_fw); 792 if (err) 793 goto out; 794 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 795 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 796 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 797 798 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 799 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 800 if (err) 801 goto out; 802 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 803 if (err) 804 goto out; 805 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 806 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 807 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 808 809 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 810 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 811 if (err) 812 goto out; 813 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 814 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; 815 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 816 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 817 818 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 819 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 820 if (err) 821 goto out; 822 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 823 if (err) 824 goto out; 825 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 826 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 827 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 828 829 if ((adev->asic_type != CHIP_STONEY) && 830 (adev->asic_type != CHIP_TOPAZ)) { 831 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 832 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 833 if (!err) { 834 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 835 if (err) 836 goto out; 837 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 838 adev->gfx.mec2_fw->data; 839 adev->gfx.mec2_fw_version = 840 le32_to_cpu(cp_hdr->header.ucode_version); 841 adev->gfx.mec2_feature_version = 842 le32_to_cpu(cp_hdr->ucode_feature_version); 843 } else { 844 err = 0; 845 adev->gfx.mec2_fw = NULL; 846 } 847 } 848 849 if (adev->firmware.smu_load) { 850 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 851 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 852 info->fw = adev->gfx.pfp_fw; 853 header = (const struct common_firmware_header *)info->fw->data; 854 adev->firmware.fw_size += 855 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 856 857 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 858 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 859 info->fw = adev->gfx.me_fw; 860 header = (const struct common_firmware_header *)info->fw->data; 861 adev->firmware.fw_size += 862 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 863 864 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 865 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 866 info->fw = adev->gfx.ce_fw; 867 header = (const struct common_firmware_header *)info->fw->data; 868 adev->firmware.fw_size += 869 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 870 871 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 872 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 873 info->fw = adev->gfx.rlc_fw; 874 header = (const struct common_firmware_header *)info->fw->data; 875 adev->firmware.fw_size += 876 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 877 878 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 879 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 880 info->fw = adev->gfx.mec_fw; 881 header = (const struct common_firmware_header *)info->fw->data; 882 adev->firmware.fw_size += 883 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 884 885 if (adev->gfx.mec2_fw) { 886 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 887 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 888 info->fw = adev->gfx.mec2_fw; 889 header = (const struct common_firmware_header *)info->fw->data; 890 adev->firmware.fw_size += 891 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 892 } 893 894 } 895 896 out: 897 if (err) { 898 dev_err(adev->dev, 899 "gfx8: Failed to load firmware \"%s\"\n", 900 fw_name); 901 release_firmware(adev->gfx.pfp_fw); 902 adev->gfx.pfp_fw = NULL; 903 release_firmware(adev->gfx.me_fw); 904 adev->gfx.me_fw = NULL; 905 release_firmware(adev->gfx.ce_fw); 906 adev->gfx.ce_fw = NULL; 907 release_firmware(adev->gfx.rlc_fw); 908 adev->gfx.rlc_fw = NULL; 909 release_firmware(adev->gfx.mec_fw); 910 adev->gfx.mec_fw = NULL; 911 release_firmware(adev->gfx.mec2_fw); 912 adev->gfx.mec2_fw = NULL; 913 } 914 return err; 915 } 916 917 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 918 { 919 int r; 920 921 if (adev->gfx.mec.hpd_eop_obj) { 922 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 923 if (unlikely(r != 0)) 924 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 925 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 926 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 927 928 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 929 adev->gfx.mec.hpd_eop_obj = NULL; 930 } 931 } 932 933 #define MEC_HPD_SIZE 2048 934 935 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 936 { 937 int r; 938 u32 *hpd; 939 940 /* 941 * we assign only 1 pipe because all other pipes will 942 * be handled by KFD 943 */ 944 adev->gfx.mec.num_mec = 1; 945 adev->gfx.mec.num_pipe = 1; 946 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 947 948 if (adev->gfx.mec.hpd_eop_obj == NULL) { 949 r = amdgpu_bo_create(adev, 950 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 951 PAGE_SIZE, true, 952 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 953 &adev->gfx.mec.hpd_eop_obj); 954 if (r) { 955 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 956 return r; 957 } 958 } 959 960 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 961 if (unlikely(r != 0)) { 962 gfx_v8_0_mec_fini(adev); 963 return r; 964 } 965 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 966 &adev->gfx.mec.hpd_eop_gpu_addr); 967 if (r) { 968 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 969 gfx_v8_0_mec_fini(adev); 970 return r; 971 } 972 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 973 if (r) { 974 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 975 gfx_v8_0_mec_fini(adev); 976 return r; 977 } 978 979 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 980 981 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 982 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 983 984 return 0; 985 } 986 987 static const u32 vgpr_init_compute_shader[] = 988 { 989 0x7e000209, 0x7e020208, 990 0x7e040207, 0x7e060206, 991 0x7e080205, 0x7e0a0204, 992 0x7e0c0203, 0x7e0e0202, 993 0x7e100201, 0x7e120200, 994 0x7e140209, 0x7e160208, 995 0x7e180207, 0x7e1a0206, 996 0x7e1c0205, 0x7e1e0204, 997 0x7e200203, 0x7e220202, 998 0x7e240201, 0x7e260200, 999 0x7e280209, 0x7e2a0208, 1000 0x7e2c0207, 0x7e2e0206, 1001 0x7e300205, 0x7e320204, 1002 0x7e340203, 0x7e360202, 1003 0x7e380201, 0x7e3a0200, 1004 0x7e3c0209, 0x7e3e0208, 1005 0x7e400207, 0x7e420206, 1006 0x7e440205, 0x7e460204, 1007 0x7e480203, 0x7e4a0202, 1008 0x7e4c0201, 0x7e4e0200, 1009 0x7e500209, 0x7e520208, 1010 0x7e540207, 0x7e560206, 1011 0x7e580205, 0x7e5a0204, 1012 0x7e5c0203, 0x7e5e0202, 1013 0x7e600201, 0x7e620200, 1014 0x7e640209, 0x7e660208, 1015 0x7e680207, 0x7e6a0206, 1016 0x7e6c0205, 0x7e6e0204, 1017 0x7e700203, 0x7e720202, 1018 0x7e740201, 0x7e760200, 1019 0x7e780209, 0x7e7a0208, 1020 0x7e7c0207, 0x7e7e0206, 1021 0xbf8a0000, 0xbf810000, 1022 }; 1023 1024 static const u32 sgpr_init_compute_shader[] = 1025 { 1026 0xbe8a0100, 0xbe8c0102, 1027 0xbe8e0104, 0xbe900106, 1028 0xbe920108, 0xbe940100, 1029 0xbe960102, 0xbe980104, 1030 0xbe9a0106, 0xbe9c0108, 1031 0xbe9e0100, 0xbea00102, 1032 0xbea20104, 0xbea40106, 1033 0xbea60108, 0xbea80100, 1034 0xbeaa0102, 0xbeac0104, 1035 0xbeae0106, 0xbeb00108, 1036 0xbeb20100, 0xbeb40102, 1037 0xbeb60104, 0xbeb80106, 1038 0xbeba0108, 0xbebc0100, 1039 0xbebe0102, 0xbec00104, 1040 0xbec20106, 0xbec40108, 1041 0xbec60100, 0xbec80102, 1042 0xbee60004, 0xbee70005, 1043 0xbeea0006, 0xbeeb0007, 1044 0xbee80008, 0xbee90009, 1045 0xbefc0000, 0xbf8a0000, 1046 0xbf810000, 0x00000000, 1047 }; 1048 1049 static const u32 vgpr_init_regs[] = 1050 { 1051 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1052 mmCOMPUTE_RESOURCE_LIMITS, 0, 1053 mmCOMPUTE_NUM_THREAD_X, 256*4, 1054 mmCOMPUTE_NUM_THREAD_Y, 1, 1055 mmCOMPUTE_NUM_THREAD_Z, 1, 1056 mmCOMPUTE_PGM_RSRC2, 20, 1057 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1058 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1059 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1060 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1061 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1062 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1063 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1064 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1065 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1066 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1067 }; 1068 1069 static const u32 sgpr1_init_regs[] = 1070 { 1071 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1072 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1073 mmCOMPUTE_NUM_THREAD_X, 256*5, 1074 mmCOMPUTE_NUM_THREAD_Y, 1, 1075 mmCOMPUTE_NUM_THREAD_Z, 1, 1076 mmCOMPUTE_PGM_RSRC2, 20, 1077 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1078 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1079 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1080 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1081 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1082 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1083 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1084 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1085 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1086 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1087 }; 1088 1089 static const u32 sgpr2_init_regs[] = 1090 { 1091 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1092 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1093 mmCOMPUTE_NUM_THREAD_X, 256*5, 1094 mmCOMPUTE_NUM_THREAD_Y, 1, 1095 mmCOMPUTE_NUM_THREAD_Z, 1, 1096 mmCOMPUTE_PGM_RSRC2, 20, 1097 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1098 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1099 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1100 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1101 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1102 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1103 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1104 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1105 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1106 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1107 }; 1108 1109 static const u32 sec_ded_counter_registers[] = 1110 { 1111 mmCPC_EDC_ATC_CNT, 1112 mmCPC_EDC_SCRATCH_CNT, 1113 mmCPC_EDC_UCODE_CNT, 1114 mmCPF_EDC_ATC_CNT, 1115 mmCPF_EDC_ROQ_CNT, 1116 mmCPF_EDC_TAG_CNT, 1117 mmCPG_EDC_ATC_CNT, 1118 mmCPG_EDC_DMA_CNT, 1119 mmCPG_EDC_TAG_CNT, 1120 mmDC_EDC_CSINVOC_CNT, 1121 mmDC_EDC_RESTORE_CNT, 1122 mmDC_EDC_STATE_CNT, 1123 mmGDS_EDC_CNT, 1124 mmGDS_EDC_GRBM_CNT, 1125 mmGDS_EDC_OA_DED, 1126 mmSPI_EDC_CNT, 1127 mmSQC_ATC_EDC_GATCL1_CNT, 1128 mmSQC_EDC_CNT, 1129 mmSQ_EDC_DED_CNT, 1130 mmSQ_EDC_INFO, 1131 mmSQ_EDC_SEC_CNT, 1132 mmTCC_EDC_CNT, 1133 mmTCP_ATC_EDC_GATCL1_CNT, 1134 mmTCP_EDC_CNT, 1135 mmTD_EDC_CNT 1136 }; 1137 1138 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1139 { 1140 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1141 struct amdgpu_ib ib; 1142 struct fence *f = NULL; 1143 int r, i; 1144 u32 tmp; 1145 unsigned total_size, vgpr_offset, sgpr_offset; 1146 u64 gpu_addr; 1147 1148 /* only supported on CZ */ 1149 if (adev->asic_type != CHIP_CARRIZO) 1150 return 0; 1151 1152 /* bail if the compute ring is not ready */ 1153 if (!ring->ready) 1154 return 0; 1155 1156 tmp = RREG32(mmGB_EDC_MODE); 1157 WREG32(mmGB_EDC_MODE, 0); 1158 1159 total_size = 1160 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1161 total_size += 1162 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1163 total_size += 1164 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1165 total_size = ALIGN(total_size, 256); 1166 vgpr_offset = total_size; 1167 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1168 sgpr_offset = total_size; 1169 total_size += sizeof(sgpr_init_compute_shader); 1170 1171 /* allocate an indirect buffer to put the commands in */ 1172 memset(&ib, 0, sizeof(ib)); 1173 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1174 if (r) { 1175 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1176 return r; 1177 } 1178 1179 /* load the compute shaders */ 1180 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1181 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1182 1183 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1184 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1185 1186 /* init the ib length to 0 */ 1187 ib.length_dw = 0; 1188 1189 /* VGPR */ 1190 /* write the register state for the compute dispatch */ 1191 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1192 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1193 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1194 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1195 } 1196 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1197 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1198 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1199 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1200 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1201 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1202 1203 /* write dispatch packet */ 1204 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1205 ib.ptr[ib.length_dw++] = 8; /* x */ 1206 ib.ptr[ib.length_dw++] = 1; /* y */ 1207 ib.ptr[ib.length_dw++] = 1; /* z */ 1208 ib.ptr[ib.length_dw++] = 1209 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1210 1211 /* write CS partial flush packet */ 1212 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1213 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1214 1215 /* SGPR1 */ 1216 /* write the register state for the compute dispatch */ 1217 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1218 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1219 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1220 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1221 } 1222 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1223 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1224 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1225 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1226 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1227 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1228 1229 /* write dispatch packet */ 1230 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1231 ib.ptr[ib.length_dw++] = 8; /* x */ 1232 ib.ptr[ib.length_dw++] = 1; /* y */ 1233 ib.ptr[ib.length_dw++] = 1; /* z */ 1234 ib.ptr[ib.length_dw++] = 1235 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1236 1237 /* write CS partial flush packet */ 1238 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1239 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1240 1241 /* SGPR2 */ 1242 /* write the register state for the compute dispatch */ 1243 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1244 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1245 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1246 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1247 } 1248 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1249 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1250 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1251 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1252 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1253 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1254 1255 /* write dispatch packet */ 1256 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1257 ib.ptr[ib.length_dw++] = 8; /* x */ 1258 ib.ptr[ib.length_dw++] = 1; /* y */ 1259 ib.ptr[ib.length_dw++] = 1; /* z */ 1260 ib.ptr[ib.length_dw++] = 1261 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1262 1263 /* write CS partial flush packet */ 1264 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1265 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1266 1267 /* shedule the ib on the ring */ 1268 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, 1269 NULL, &f); 1270 if (r) { 1271 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1272 goto fail; 1273 } 1274 1275 /* wait for the GPU to finish processing the IB */ 1276 r = fence_wait(f, false); 1277 if (r) { 1278 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1279 goto fail; 1280 } 1281 1282 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1283 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1284 WREG32(mmGB_EDC_MODE, tmp); 1285 1286 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1287 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1288 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1289 1290 1291 /* read back registers to clear the counters */ 1292 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1293 RREG32(sec_ded_counter_registers[i]); 1294 1295 fail: 1296 fence_put(f); 1297 amdgpu_ib_free(adev, &ib); 1298 1299 return r; 1300 } 1301 1302 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1303 { 1304 u32 gb_addr_config; 1305 u32 mc_shared_chmap, mc_arb_ramcfg; 1306 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1307 u32 tmp; 1308 1309 switch (adev->asic_type) { 1310 case CHIP_TOPAZ: 1311 adev->gfx.config.max_shader_engines = 1; 1312 adev->gfx.config.max_tile_pipes = 2; 1313 adev->gfx.config.max_cu_per_sh = 6; 1314 adev->gfx.config.max_sh_per_se = 1; 1315 adev->gfx.config.max_backends_per_se = 2; 1316 adev->gfx.config.max_texture_channel_caches = 2; 1317 adev->gfx.config.max_gprs = 256; 1318 adev->gfx.config.max_gs_threads = 32; 1319 adev->gfx.config.max_hw_contexts = 8; 1320 1321 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1322 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1323 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1324 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1325 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1326 break; 1327 case CHIP_FIJI: 1328 adev->gfx.config.max_shader_engines = 4; 1329 adev->gfx.config.max_tile_pipes = 16; 1330 adev->gfx.config.max_cu_per_sh = 16; 1331 adev->gfx.config.max_sh_per_se = 1; 1332 adev->gfx.config.max_backends_per_se = 4; 1333 adev->gfx.config.max_texture_channel_caches = 16; 1334 adev->gfx.config.max_gprs = 256; 1335 adev->gfx.config.max_gs_threads = 32; 1336 adev->gfx.config.max_hw_contexts = 8; 1337 1338 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1339 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1340 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1341 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1342 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1343 break; 1344 case CHIP_TONGA: 1345 adev->gfx.config.max_shader_engines = 4; 1346 adev->gfx.config.max_tile_pipes = 8; 1347 adev->gfx.config.max_cu_per_sh = 8; 1348 adev->gfx.config.max_sh_per_se = 1; 1349 adev->gfx.config.max_backends_per_se = 2; 1350 adev->gfx.config.max_texture_channel_caches = 8; 1351 adev->gfx.config.max_gprs = 256; 1352 adev->gfx.config.max_gs_threads = 32; 1353 adev->gfx.config.max_hw_contexts = 8; 1354 1355 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1356 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1357 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1358 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1359 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1360 break; 1361 case CHIP_CARRIZO: 1362 adev->gfx.config.max_shader_engines = 1; 1363 adev->gfx.config.max_tile_pipes = 2; 1364 adev->gfx.config.max_sh_per_se = 1; 1365 adev->gfx.config.max_backends_per_se = 2; 1366 1367 switch (adev->pdev->revision) { 1368 case 0xc4: 1369 case 0x84: 1370 case 0xc8: 1371 case 0xcc: 1372 case 0xe1: 1373 case 0xe3: 1374 /* B10 */ 1375 adev->gfx.config.max_cu_per_sh = 8; 1376 break; 1377 case 0xc5: 1378 case 0x81: 1379 case 0x85: 1380 case 0xc9: 1381 case 0xcd: 1382 case 0xe2: 1383 case 0xe4: 1384 /* B8 */ 1385 adev->gfx.config.max_cu_per_sh = 6; 1386 break; 1387 case 0xc6: 1388 case 0xca: 1389 case 0xce: 1390 case 0x88: 1391 /* B6 */ 1392 adev->gfx.config.max_cu_per_sh = 6; 1393 break; 1394 case 0xc7: 1395 case 0x87: 1396 case 0xcb: 1397 case 0xe5: 1398 case 0x89: 1399 default: 1400 /* B4 */ 1401 adev->gfx.config.max_cu_per_sh = 4; 1402 break; 1403 } 1404 1405 adev->gfx.config.max_texture_channel_caches = 2; 1406 adev->gfx.config.max_gprs = 256; 1407 adev->gfx.config.max_gs_threads = 32; 1408 adev->gfx.config.max_hw_contexts = 8; 1409 1410 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1411 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1412 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1413 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1414 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1415 break; 1416 case CHIP_STONEY: 1417 adev->gfx.config.max_shader_engines = 1; 1418 adev->gfx.config.max_tile_pipes = 2; 1419 adev->gfx.config.max_sh_per_se = 1; 1420 adev->gfx.config.max_backends_per_se = 1; 1421 1422 switch (adev->pdev->revision) { 1423 case 0xc0: 1424 case 0xc1: 1425 case 0xc2: 1426 case 0xc4: 1427 case 0xc8: 1428 case 0xc9: 1429 adev->gfx.config.max_cu_per_sh = 3; 1430 break; 1431 case 0xd0: 1432 case 0xd1: 1433 case 0xd2: 1434 default: 1435 adev->gfx.config.max_cu_per_sh = 2; 1436 break; 1437 } 1438 1439 adev->gfx.config.max_texture_channel_caches = 2; 1440 adev->gfx.config.max_gprs = 256; 1441 adev->gfx.config.max_gs_threads = 16; 1442 adev->gfx.config.max_hw_contexts = 8; 1443 1444 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1445 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1446 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1447 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1448 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1449 break; 1450 default: 1451 adev->gfx.config.max_shader_engines = 2; 1452 adev->gfx.config.max_tile_pipes = 4; 1453 adev->gfx.config.max_cu_per_sh = 2; 1454 adev->gfx.config.max_sh_per_se = 1; 1455 adev->gfx.config.max_backends_per_se = 2; 1456 adev->gfx.config.max_texture_channel_caches = 4; 1457 adev->gfx.config.max_gprs = 256; 1458 adev->gfx.config.max_gs_threads = 32; 1459 adev->gfx.config.max_hw_contexts = 8; 1460 1461 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1462 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1463 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1464 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1465 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1466 break; 1467 } 1468 1469 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1470 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1471 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1472 1473 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1474 adev->gfx.config.mem_max_burst_length_bytes = 256; 1475 if (adev->flags & AMD_IS_APU) { 1476 /* Get memory bank mapping mode. */ 1477 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1478 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1479 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1480 1481 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1482 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1483 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1484 1485 /* Validate settings in case only one DIMM installed. */ 1486 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1487 dimm00_addr_map = 0; 1488 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1489 dimm01_addr_map = 0; 1490 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1491 dimm10_addr_map = 0; 1492 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1493 dimm11_addr_map = 0; 1494 1495 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1496 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1497 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1498 adev->gfx.config.mem_row_size_in_kb = 2; 1499 else 1500 adev->gfx.config.mem_row_size_in_kb = 1; 1501 } else { 1502 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1503 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1504 if (adev->gfx.config.mem_row_size_in_kb > 4) 1505 adev->gfx.config.mem_row_size_in_kb = 4; 1506 } 1507 1508 adev->gfx.config.shader_engine_tile_size = 32; 1509 adev->gfx.config.num_gpus = 1; 1510 adev->gfx.config.multi_gpu_tile_size = 64; 1511 1512 /* fix up row size */ 1513 switch (adev->gfx.config.mem_row_size_in_kb) { 1514 case 1: 1515 default: 1516 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1517 break; 1518 case 2: 1519 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1520 break; 1521 case 4: 1522 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1523 break; 1524 } 1525 adev->gfx.config.gb_addr_config = gb_addr_config; 1526 } 1527 1528 static int gfx_v8_0_sw_init(void *handle) 1529 { 1530 int i, r; 1531 struct amdgpu_ring *ring; 1532 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1533 1534 /* EOP Event */ 1535 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 1536 if (r) 1537 return r; 1538 1539 /* Privileged reg */ 1540 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 1541 if (r) 1542 return r; 1543 1544 /* Privileged inst */ 1545 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 1546 if (r) 1547 return r; 1548 1549 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1550 1551 gfx_v8_0_scratch_init(adev); 1552 1553 r = gfx_v8_0_init_microcode(adev); 1554 if (r) { 1555 DRM_ERROR("Failed to load gfx firmware!\n"); 1556 return r; 1557 } 1558 1559 r = gfx_v8_0_mec_init(adev); 1560 if (r) { 1561 DRM_ERROR("Failed to init MEC BOs!\n"); 1562 return r; 1563 } 1564 1565 /* set up the gfx ring */ 1566 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1567 ring = &adev->gfx.gfx_ring[i]; 1568 ring->ring_obj = NULL; 1569 sprintf(ring->name, "gfx"); 1570 /* no gfx doorbells on iceland */ 1571 if (adev->asic_type != CHIP_TOPAZ) { 1572 ring->use_doorbell = true; 1573 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 1574 } 1575 1576 r = amdgpu_ring_init(adev, ring, 1024 * 1024, 1577 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 1578 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, 1579 AMDGPU_RING_TYPE_GFX); 1580 if (r) 1581 return r; 1582 } 1583 1584 /* set up the compute queues */ 1585 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1586 unsigned irq_type; 1587 1588 /* max 32 queues per MEC */ 1589 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 1590 DRM_ERROR("Too many (%d) compute rings!\n", i); 1591 break; 1592 } 1593 ring = &adev->gfx.compute_ring[i]; 1594 ring->ring_obj = NULL; 1595 ring->use_doorbell = true; 1596 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 1597 ring->me = 1; /* first MEC */ 1598 ring->pipe = i / 8; 1599 ring->queue = i % 8; 1600 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); 1601 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 1602 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1603 r = amdgpu_ring_init(adev, ring, 1024 * 1024, 1604 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 1605 &adev->gfx.eop_irq, irq_type, 1606 AMDGPU_RING_TYPE_COMPUTE); 1607 if (r) 1608 return r; 1609 } 1610 1611 /* reserve GDS, GWS and OA resource for gfx */ 1612 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, 1613 PAGE_SIZE, true, 1614 AMDGPU_GEM_DOMAIN_GDS, 0, NULL, 1615 NULL, &adev->gds.gds_gfx_bo); 1616 if (r) 1617 return r; 1618 1619 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, 1620 PAGE_SIZE, true, 1621 AMDGPU_GEM_DOMAIN_GWS, 0, NULL, 1622 NULL, &adev->gds.gws_gfx_bo); 1623 if (r) 1624 return r; 1625 1626 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, 1627 PAGE_SIZE, true, 1628 AMDGPU_GEM_DOMAIN_OA, 0, NULL, 1629 NULL, &adev->gds.oa_gfx_bo); 1630 if (r) 1631 return r; 1632 1633 adev->gfx.ce_ram_size = 0x8000; 1634 1635 gfx_v8_0_gpu_early_init(adev); 1636 1637 return 0; 1638 } 1639 1640 static int gfx_v8_0_sw_fini(void *handle) 1641 { 1642 int i; 1643 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1644 1645 amdgpu_bo_unref(&adev->gds.oa_gfx_bo); 1646 amdgpu_bo_unref(&adev->gds.gws_gfx_bo); 1647 amdgpu_bo_unref(&adev->gds.gds_gfx_bo); 1648 1649 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1650 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1651 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1652 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1653 1654 gfx_v8_0_mec_fini(adev); 1655 1656 return 0; 1657 } 1658 1659 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 1660 { 1661 uint32_t *modearray, *mod2array; 1662 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 1663 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 1664 u32 reg_offset; 1665 1666 modearray = adev->gfx.config.tile_mode_array; 1667 mod2array = adev->gfx.config.macrotile_mode_array; 1668 1669 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 1670 modearray[reg_offset] = 0; 1671 1672 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 1673 mod2array[reg_offset] = 0; 1674 1675 switch (adev->asic_type) { 1676 case CHIP_TOPAZ: 1677 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1678 PIPE_CONFIG(ADDR_SURF_P2) | 1679 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 1680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1681 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1682 PIPE_CONFIG(ADDR_SURF_P2) | 1683 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 1684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1685 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1686 PIPE_CONFIG(ADDR_SURF_P2) | 1687 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1688 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1689 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1690 PIPE_CONFIG(ADDR_SURF_P2) | 1691 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1692 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1693 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1694 PIPE_CONFIG(ADDR_SURF_P2) | 1695 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1696 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1697 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1698 PIPE_CONFIG(ADDR_SURF_P2) | 1699 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1700 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1701 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1702 PIPE_CONFIG(ADDR_SURF_P2) | 1703 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1704 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1705 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1706 PIPE_CONFIG(ADDR_SURF_P2)); 1707 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1708 PIPE_CONFIG(ADDR_SURF_P2) | 1709 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1711 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1712 PIPE_CONFIG(ADDR_SURF_P2) | 1713 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1715 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1716 PIPE_CONFIG(ADDR_SURF_P2) | 1717 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1719 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1720 PIPE_CONFIG(ADDR_SURF_P2) | 1721 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1723 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1724 PIPE_CONFIG(ADDR_SURF_P2) | 1725 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1727 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 1728 PIPE_CONFIG(ADDR_SURF_P2) | 1729 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1731 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1732 PIPE_CONFIG(ADDR_SURF_P2) | 1733 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1735 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1736 PIPE_CONFIG(ADDR_SURF_P2) | 1737 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1739 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1740 PIPE_CONFIG(ADDR_SURF_P2) | 1741 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1743 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1744 PIPE_CONFIG(ADDR_SURF_P2) | 1745 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1747 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 1748 PIPE_CONFIG(ADDR_SURF_P2) | 1749 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1751 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1752 PIPE_CONFIG(ADDR_SURF_P2) | 1753 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1755 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1756 PIPE_CONFIG(ADDR_SURF_P2) | 1757 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1759 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 1760 PIPE_CONFIG(ADDR_SURF_P2) | 1761 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1762 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1763 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 1764 PIPE_CONFIG(ADDR_SURF_P2) | 1765 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1766 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1767 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1768 PIPE_CONFIG(ADDR_SURF_P2) | 1769 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1771 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1772 PIPE_CONFIG(ADDR_SURF_P2) | 1773 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1775 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1776 PIPE_CONFIG(ADDR_SURF_P2) | 1777 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1778 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1779 1780 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1783 NUM_BANKS(ADDR_SURF_8_BANK)); 1784 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1787 NUM_BANKS(ADDR_SURF_8_BANK)); 1788 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1789 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1790 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1791 NUM_BANKS(ADDR_SURF_8_BANK)); 1792 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1793 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1794 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1795 NUM_BANKS(ADDR_SURF_8_BANK)); 1796 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1799 NUM_BANKS(ADDR_SURF_8_BANK)); 1800 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1801 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1802 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1803 NUM_BANKS(ADDR_SURF_8_BANK)); 1804 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1805 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1806 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1807 NUM_BANKS(ADDR_SURF_8_BANK)); 1808 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1811 NUM_BANKS(ADDR_SURF_16_BANK)); 1812 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 1813 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1814 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1815 NUM_BANKS(ADDR_SURF_16_BANK)); 1816 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1819 NUM_BANKS(ADDR_SURF_16_BANK)); 1820 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 1821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1823 NUM_BANKS(ADDR_SURF_16_BANK)); 1824 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1827 NUM_BANKS(ADDR_SURF_16_BANK)); 1828 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1829 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1830 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 1831 NUM_BANKS(ADDR_SURF_16_BANK)); 1832 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1835 NUM_BANKS(ADDR_SURF_8_BANK)); 1836 1837 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 1838 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 1839 reg_offset != 23) 1840 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 1841 1842 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 1843 if (reg_offset != 7) 1844 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 1845 1846 break; 1847 case CHIP_FIJI: 1848 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1849 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1850 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 1851 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1852 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1853 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1854 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 1855 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1856 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1857 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1858 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1859 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1860 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1861 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1863 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1864 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1865 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1866 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1867 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1868 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1869 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1870 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1871 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1872 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1873 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1875 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1876 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1877 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1878 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 1879 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1880 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1881 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 1882 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1883 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1884 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1886 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1887 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1888 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1890 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1891 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1892 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1894 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1895 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1896 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1898 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1899 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1900 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1902 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1903 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1904 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1905 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1906 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 1907 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1908 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1909 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1910 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1911 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1912 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1914 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1915 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1916 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1917 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1918 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1919 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1920 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1921 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1922 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1923 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1924 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1925 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1926 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1927 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1928 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1929 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1930 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 1931 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1932 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1933 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1934 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1935 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1936 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1938 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1939 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1940 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1942 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1943 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1944 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1946 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 1947 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1948 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1949 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1950 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 1951 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1952 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1954 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1955 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1956 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1957 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1958 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1959 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1960 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1961 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1962 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1963 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1966 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1967 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1968 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1969 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1970 1971 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1974 NUM_BANKS(ADDR_SURF_8_BANK)); 1975 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1978 NUM_BANKS(ADDR_SURF_8_BANK)); 1979 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1982 NUM_BANKS(ADDR_SURF_8_BANK)); 1983 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 1986 NUM_BANKS(ADDR_SURF_8_BANK)); 1987 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1990 NUM_BANKS(ADDR_SURF_8_BANK)); 1991 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1992 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1993 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1994 NUM_BANKS(ADDR_SURF_8_BANK)); 1995 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1998 NUM_BANKS(ADDR_SURF_8_BANK)); 1999 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2000 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2001 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2002 NUM_BANKS(ADDR_SURF_8_BANK)); 2003 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2006 NUM_BANKS(ADDR_SURF_8_BANK)); 2007 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2010 NUM_BANKS(ADDR_SURF_8_BANK)); 2011 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2012 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2013 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2014 NUM_BANKS(ADDR_SURF_8_BANK)); 2015 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2018 NUM_BANKS(ADDR_SURF_8_BANK)); 2019 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2020 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2021 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2022 NUM_BANKS(ADDR_SURF_8_BANK)); 2023 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2026 NUM_BANKS(ADDR_SURF_4_BANK)); 2027 2028 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2029 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2030 2031 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2032 if (reg_offset != 7) 2033 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2034 2035 break; 2036 case CHIP_TONGA: 2037 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2041 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2042 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2045 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2049 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2053 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2057 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2061 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2063 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2064 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2065 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2066 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2067 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2068 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2069 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2071 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2073 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2075 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2076 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2077 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2079 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2080 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2081 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2083 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2084 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2087 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2088 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2091 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2092 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2095 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2096 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2097 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2099 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2100 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2101 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2103 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2104 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2105 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2107 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2109 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2111 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2112 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2113 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2115 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2116 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2117 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2119 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2120 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2123 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2124 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2125 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2127 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2128 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2129 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2131 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2132 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2133 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2135 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2136 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2137 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2139 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2140 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2141 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2143 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2144 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2145 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2147 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2148 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2149 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2151 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2152 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2153 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2155 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2156 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2157 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2159 2160 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2163 NUM_BANKS(ADDR_SURF_16_BANK)); 2164 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2167 NUM_BANKS(ADDR_SURF_16_BANK)); 2168 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2171 NUM_BANKS(ADDR_SURF_16_BANK)); 2172 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2175 NUM_BANKS(ADDR_SURF_16_BANK)); 2176 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2179 NUM_BANKS(ADDR_SURF_16_BANK)); 2180 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2183 NUM_BANKS(ADDR_SURF_16_BANK)); 2184 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2187 NUM_BANKS(ADDR_SURF_16_BANK)); 2188 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2191 NUM_BANKS(ADDR_SURF_16_BANK)); 2192 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2195 NUM_BANKS(ADDR_SURF_16_BANK)); 2196 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2199 NUM_BANKS(ADDR_SURF_16_BANK)); 2200 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2203 NUM_BANKS(ADDR_SURF_16_BANK)); 2204 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2207 NUM_BANKS(ADDR_SURF_8_BANK)); 2208 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2211 NUM_BANKS(ADDR_SURF_4_BANK)); 2212 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2215 NUM_BANKS(ADDR_SURF_4_BANK)); 2216 2217 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2218 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2219 2220 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2221 if (reg_offset != 7) 2222 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2223 2224 break; 2225 case CHIP_STONEY: 2226 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2227 PIPE_CONFIG(ADDR_SURF_P2) | 2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2230 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2231 PIPE_CONFIG(ADDR_SURF_P2) | 2232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2234 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2235 PIPE_CONFIG(ADDR_SURF_P2) | 2236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2238 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2239 PIPE_CONFIG(ADDR_SURF_P2) | 2240 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2241 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2242 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2243 PIPE_CONFIG(ADDR_SURF_P2) | 2244 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2246 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2247 PIPE_CONFIG(ADDR_SURF_P2) | 2248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2250 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2251 PIPE_CONFIG(ADDR_SURF_P2) | 2252 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2254 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2255 PIPE_CONFIG(ADDR_SURF_P2)); 2256 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2257 PIPE_CONFIG(ADDR_SURF_P2) | 2258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2260 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2261 PIPE_CONFIG(ADDR_SURF_P2) | 2262 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2264 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2265 PIPE_CONFIG(ADDR_SURF_P2) | 2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2268 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2269 PIPE_CONFIG(ADDR_SURF_P2) | 2270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2272 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2273 PIPE_CONFIG(ADDR_SURF_P2) | 2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2276 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2277 PIPE_CONFIG(ADDR_SURF_P2) | 2278 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2280 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2281 PIPE_CONFIG(ADDR_SURF_P2) | 2282 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2284 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2285 PIPE_CONFIG(ADDR_SURF_P2) | 2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2288 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2289 PIPE_CONFIG(ADDR_SURF_P2) | 2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2292 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2293 PIPE_CONFIG(ADDR_SURF_P2) | 2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2296 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2297 PIPE_CONFIG(ADDR_SURF_P2) | 2298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2300 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2301 PIPE_CONFIG(ADDR_SURF_P2) | 2302 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2304 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2305 PIPE_CONFIG(ADDR_SURF_P2) | 2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2308 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2309 PIPE_CONFIG(ADDR_SURF_P2) | 2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2312 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2313 PIPE_CONFIG(ADDR_SURF_P2) | 2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2316 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2317 PIPE_CONFIG(ADDR_SURF_P2) | 2318 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2320 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2321 PIPE_CONFIG(ADDR_SURF_P2) | 2322 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2324 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2325 PIPE_CONFIG(ADDR_SURF_P2) | 2326 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2328 2329 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2332 NUM_BANKS(ADDR_SURF_8_BANK)); 2333 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2336 NUM_BANKS(ADDR_SURF_8_BANK)); 2337 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2340 NUM_BANKS(ADDR_SURF_8_BANK)); 2341 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2344 NUM_BANKS(ADDR_SURF_8_BANK)); 2345 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2348 NUM_BANKS(ADDR_SURF_8_BANK)); 2349 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2352 NUM_BANKS(ADDR_SURF_8_BANK)); 2353 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2356 NUM_BANKS(ADDR_SURF_8_BANK)); 2357 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2360 NUM_BANKS(ADDR_SURF_16_BANK)); 2361 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2364 NUM_BANKS(ADDR_SURF_16_BANK)); 2365 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2368 NUM_BANKS(ADDR_SURF_16_BANK)); 2369 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2372 NUM_BANKS(ADDR_SURF_16_BANK)); 2373 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2376 NUM_BANKS(ADDR_SURF_16_BANK)); 2377 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2380 NUM_BANKS(ADDR_SURF_16_BANK)); 2381 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2384 NUM_BANKS(ADDR_SURF_8_BANK)); 2385 2386 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2387 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2388 reg_offset != 23) 2389 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2390 2391 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2392 if (reg_offset != 7) 2393 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2394 2395 break; 2396 default: 2397 dev_warn(adev->dev, 2398 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 2399 adev->asic_type); 2400 2401 case CHIP_CARRIZO: 2402 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2403 PIPE_CONFIG(ADDR_SURF_P2) | 2404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2406 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2407 PIPE_CONFIG(ADDR_SURF_P2) | 2408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2410 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2411 PIPE_CONFIG(ADDR_SURF_P2) | 2412 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2413 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2414 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2415 PIPE_CONFIG(ADDR_SURF_P2) | 2416 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2418 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2419 PIPE_CONFIG(ADDR_SURF_P2) | 2420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2421 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2422 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2423 PIPE_CONFIG(ADDR_SURF_P2) | 2424 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2426 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2427 PIPE_CONFIG(ADDR_SURF_P2) | 2428 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2430 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2431 PIPE_CONFIG(ADDR_SURF_P2)); 2432 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2433 PIPE_CONFIG(ADDR_SURF_P2) | 2434 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2436 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2437 PIPE_CONFIG(ADDR_SURF_P2) | 2438 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2440 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2441 PIPE_CONFIG(ADDR_SURF_P2) | 2442 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2444 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2445 PIPE_CONFIG(ADDR_SURF_P2) | 2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2448 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2449 PIPE_CONFIG(ADDR_SURF_P2) | 2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2452 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2453 PIPE_CONFIG(ADDR_SURF_P2) | 2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2456 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2457 PIPE_CONFIG(ADDR_SURF_P2) | 2458 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2460 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2461 PIPE_CONFIG(ADDR_SURF_P2) | 2462 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2464 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2465 PIPE_CONFIG(ADDR_SURF_P2) | 2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2468 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2469 PIPE_CONFIG(ADDR_SURF_P2) | 2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2472 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2473 PIPE_CONFIG(ADDR_SURF_P2) | 2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2476 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2477 PIPE_CONFIG(ADDR_SURF_P2) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2480 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2481 PIPE_CONFIG(ADDR_SURF_P2) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2484 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2485 PIPE_CONFIG(ADDR_SURF_P2) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2488 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2489 PIPE_CONFIG(ADDR_SURF_P2) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2492 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P2) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2496 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P2) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2500 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2501 PIPE_CONFIG(ADDR_SURF_P2) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2504 2505 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2508 NUM_BANKS(ADDR_SURF_8_BANK)); 2509 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2512 NUM_BANKS(ADDR_SURF_8_BANK)); 2513 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2516 NUM_BANKS(ADDR_SURF_8_BANK)); 2517 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2520 NUM_BANKS(ADDR_SURF_8_BANK)); 2521 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2524 NUM_BANKS(ADDR_SURF_8_BANK)); 2525 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2528 NUM_BANKS(ADDR_SURF_8_BANK)); 2529 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2532 NUM_BANKS(ADDR_SURF_8_BANK)); 2533 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2536 NUM_BANKS(ADDR_SURF_16_BANK)); 2537 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2540 NUM_BANKS(ADDR_SURF_16_BANK)); 2541 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2544 NUM_BANKS(ADDR_SURF_16_BANK)); 2545 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2548 NUM_BANKS(ADDR_SURF_16_BANK)); 2549 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2552 NUM_BANKS(ADDR_SURF_16_BANK)); 2553 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2556 NUM_BANKS(ADDR_SURF_16_BANK)); 2557 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2560 NUM_BANKS(ADDR_SURF_8_BANK)); 2561 2562 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2563 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2564 reg_offset != 23) 2565 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2566 2567 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2568 if (reg_offset != 7) 2569 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2570 2571 break; 2572 } 2573 } 2574 2575 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 2576 { 2577 return (u32)((1ULL << bit_width) - 1); 2578 } 2579 2580 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) 2581 { 2582 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2583 2584 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { 2585 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2586 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2587 } else if (se_num == 0xffffffff) { 2588 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2589 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2590 } else if (sh_num == 0xffffffff) { 2591 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2592 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2593 } else { 2594 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2595 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2596 } 2597 WREG32(mmGRBM_GFX_INDEX, data); 2598 } 2599 2600 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, 2601 u32 max_rb_num_per_se, 2602 u32 sh_per_se) 2603 { 2604 u32 data, mask; 2605 2606 data = RREG32(mmCC_RB_BACKEND_DISABLE); 2607 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2608 2609 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); 2610 2611 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2612 2613 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se); 2614 2615 return data & mask; 2616 } 2617 2618 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, 2619 u32 se_num, u32 sh_per_se, 2620 u32 max_rb_num_per_se) 2621 { 2622 int i, j; 2623 u32 data, mask; 2624 u32 disabled_rbs = 0; 2625 u32 enabled_rbs = 0; 2626 2627 mutex_lock(&adev->grbm_idx_mutex); 2628 for (i = 0; i < se_num; i++) { 2629 for (j = 0; j < sh_per_se; j++) { 2630 gfx_v8_0_select_se_sh(adev, i, j); 2631 data = gfx_v8_0_get_rb_disabled(adev, 2632 max_rb_num_per_se, sh_per_se); 2633 disabled_rbs |= data << ((i * sh_per_se + j) * 2634 RB_BITMAP_WIDTH_PER_SH); 2635 } 2636 } 2637 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 2638 mutex_unlock(&adev->grbm_idx_mutex); 2639 2640 mask = 1; 2641 for (i = 0; i < max_rb_num_per_se * se_num; i++) { 2642 if (!(disabled_rbs & mask)) 2643 enabled_rbs |= mask; 2644 mask <<= 1; 2645 } 2646 2647 adev->gfx.config.backend_enable_mask = enabled_rbs; 2648 2649 mutex_lock(&adev->grbm_idx_mutex); 2650 for (i = 0; i < se_num; i++) { 2651 gfx_v8_0_select_se_sh(adev, i, 0xffffffff); 2652 data = RREG32(mmPA_SC_RASTER_CONFIG); 2653 for (j = 0; j < sh_per_se; j++) { 2654 switch (enabled_rbs & 3) { 2655 case 0: 2656 if (j == 0) 2657 data |= (RASTER_CONFIG_RB_MAP_3 << 2658 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); 2659 else 2660 data |= (RASTER_CONFIG_RB_MAP_0 << 2661 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); 2662 break; 2663 case 1: 2664 data |= (RASTER_CONFIG_RB_MAP_0 << 2665 (i * sh_per_se + j) * 2); 2666 break; 2667 case 2: 2668 data |= (RASTER_CONFIG_RB_MAP_3 << 2669 (i * sh_per_se + j) * 2); 2670 break; 2671 case 3: 2672 default: 2673 data |= (RASTER_CONFIG_RB_MAP_2 << 2674 (i * sh_per_se + j) * 2); 2675 break; 2676 } 2677 enabled_rbs >>= 2; 2678 } 2679 WREG32(mmPA_SC_RASTER_CONFIG, data); 2680 } 2681 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 2682 mutex_unlock(&adev->grbm_idx_mutex); 2683 } 2684 2685 /** 2686 * gfx_v8_0_init_compute_vmid - gart enable 2687 * 2688 * @rdev: amdgpu_device pointer 2689 * 2690 * Initialize compute vmid sh_mem registers 2691 * 2692 */ 2693 #define DEFAULT_SH_MEM_BASES (0x6000) 2694 #define FIRST_COMPUTE_VMID (8) 2695 #define LAST_COMPUTE_VMID (16) 2696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 2697 { 2698 int i; 2699 uint32_t sh_mem_config; 2700 uint32_t sh_mem_bases; 2701 2702 /* 2703 * Configure apertures: 2704 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2705 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2706 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2707 */ 2708 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2709 2710 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 2711 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 2712 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2713 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 2714 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 2715 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 2716 2717 mutex_lock(&adev->srbm_mutex); 2718 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2719 vi_srbm_select(adev, 0, 0, 0, i); 2720 /* CP and shaders */ 2721 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 2722 WREG32(mmSH_MEM_APE1_BASE, 1); 2723 WREG32(mmSH_MEM_APE1_LIMIT, 0); 2724 WREG32(mmSH_MEM_BASES, sh_mem_bases); 2725 } 2726 vi_srbm_select(adev, 0, 0, 0, 0); 2727 mutex_unlock(&adev->srbm_mutex); 2728 } 2729 2730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 2731 { 2732 u32 tmp; 2733 int i; 2734 2735 tmp = RREG32(mmGRBM_CNTL); 2736 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff); 2737 WREG32(mmGRBM_CNTL, tmp); 2738 2739 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2740 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2741 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 2742 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, 2743 adev->gfx.config.gb_addr_config & 0x70); 2744 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, 2745 adev->gfx.config.gb_addr_config & 0x70); 2746 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2747 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2748 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2749 2750 gfx_v8_0_tiling_mode_table_init(adev); 2751 2752 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines, 2753 adev->gfx.config.max_sh_per_se, 2754 adev->gfx.config.max_backends_per_se); 2755 2756 /* XXX SH_MEM regs */ 2757 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2758 mutex_lock(&adev->srbm_mutex); 2759 for (i = 0; i < 16; i++) { 2760 vi_srbm_select(adev, 0, 0, 0, i); 2761 /* CP and shaders */ 2762 if (i == 0) { 2763 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 2764 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 2765 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 2766 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2767 WREG32(mmSH_MEM_CONFIG, tmp); 2768 } else { 2769 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 2770 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 2771 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 2772 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2773 WREG32(mmSH_MEM_CONFIG, tmp); 2774 } 2775 2776 WREG32(mmSH_MEM_APE1_BASE, 1); 2777 WREG32(mmSH_MEM_APE1_LIMIT, 0); 2778 WREG32(mmSH_MEM_BASES, 0); 2779 } 2780 vi_srbm_select(adev, 0, 0, 0, 0); 2781 mutex_unlock(&adev->srbm_mutex); 2782 2783 gfx_v8_0_init_compute_vmid(adev); 2784 2785 mutex_lock(&adev->grbm_idx_mutex); 2786 /* 2787 * making sure that the following register writes will be broadcasted 2788 * to all the shaders 2789 */ 2790 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 2791 2792 WREG32(mmPA_SC_FIFO_SIZE, 2793 (adev->gfx.config.sc_prim_fifo_size_frontend << 2794 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 2795 (adev->gfx.config.sc_prim_fifo_size_backend << 2796 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 2797 (adev->gfx.config.sc_hiz_tile_fifo_size << 2798 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 2799 (adev->gfx.config.sc_earlyz_tile_fifo_size << 2800 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 2801 mutex_unlock(&adev->grbm_idx_mutex); 2802 2803 } 2804 2805 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2806 { 2807 u32 i, j, k; 2808 u32 mask; 2809 2810 mutex_lock(&adev->grbm_idx_mutex); 2811 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2812 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2813 gfx_v8_0_select_se_sh(adev, i, j); 2814 for (k = 0; k < adev->usec_timeout; k++) { 2815 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2816 break; 2817 udelay(1); 2818 } 2819 } 2820 } 2821 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 2822 mutex_unlock(&adev->grbm_idx_mutex); 2823 2824 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2825 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2826 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2827 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2828 for (k = 0; k < adev->usec_timeout; k++) { 2829 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2830 break; 2831 udelay(1); 2832 } 2833 } 2834 2835 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2836 bool enable) 2837 { 2838 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 2839 2840 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2841 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2842 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2843 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2844 2845 WREG32(mmCP_INT_CNTL_RING0, tmp); 2846 } 2847 2848 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 2849 { 2850 u32 tmp = RREG32(mmRLC_CNTL); 2851 2852 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2853 WREG32(mmRLC_CNTL, tmp); 2854 2855 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 2856 2857 gfx_v8_0_wait_for_rlc_serdes(adev); 2858 } 2859 2860 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 2861 { 2862 u32 tmp = RREG32(mmGRBM_SOFT_RESET); 2863 2864 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2865 WREG32(mmGRBM_SOFT_RESET, tmp); 2866 udelay(50); 2867 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2868 WREG32(mmGRBM_SOFT_RESET, tmp); 2869 udelay(50); 2870 } 2871 2872 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 2873 { 2874 u32 tmp = RREG32(mmRLC_CNTL); 2875 2876 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1); 2877 WREG32(mmRLC_CNTL, tmp); 2878 2879 /* carrizo do enable cp interrupt after cp inited */ 2880 if (!(adev->flags & AMD_IS_APU)) 2881 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 2882 2883 udelay(50); 2884 } 2885 2886 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 2887 { 2888 const struct rlc_firmware_header_v2_0 *hdr; 2889 const __le32 *fw_data; 2890 unsigned i, fw_size; 2891 2892 if (!adev->gfx.rlc_fw) 2893 return -EINVAL; 2894 2895 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2896 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2897 2898 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2899 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2900 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2901 2902 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 2903 for (i = 0; i < fw_size; i++) 2904 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2905 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2906 2907 return 0; 2908 } 2909 2910 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 2911 { 2912 int r; 2913 2914 gfx_v8_0_rlc_stop(adev); 2915 2916 /* disable CG */ 2917 WREG32(mmRLC_CGCG_CGLS_CTRL, 0); 2918 2919 /* disable PG */ 2920 WREG32(mmRLC_PG_CNTL, 0); 2921 2922 gfx_v8_0_rlc_reset(adev); 2923 2924 if (!adev->pp_enabled) { 2925 if (!adev->firmware.smu_load) { 2926 /* legacy rlc firmware loading */ 2927 r = gfx_v8_0_rlc_load_microcode(adev); 2928 if (r) 2929 return r; 2930 } else { 2931 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 2932 AMDGPU_UCODE_ID_RLC_G); 2933 if (r) 2934 return -EINVAL; 2935 } 2936 } 2937 2938 gfx_v8_0_rlc_start(adev); 2939 2940 return 0; 2941 } 2942 2943 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2944 { 2945 int i; 2946 u32 tmp = RREG32(mmCP_ME_CNTL); 2947 2948 if (enable) { 2949 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 2950 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 2951 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 2952 } else { 2953 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 2954 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 2955 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 2956 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2957 adev->gfx.gfx_ring[i].ready = false; 2958 } 2959 WREG32(mmCP_ME_CNTL, tmp); 2960 udelay(50); 2961 } 2962 2963 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2964 { 2965 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2966 const struct gfx_firmware_header_v1_0 *ce_hdr; 2967 const struct gfx_firmware_header_v1_0 *me_hdr; 2968 const __le32 *fw_data; 2969 unsigned i, fw_size; 2970 2971 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2972 return -EINVAL; 2973 2974 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2975 adev->gfx.pfp_fw->data; 2976 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2977 adev->gfx.ce_fw->data; 2978 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2979 adev->gfx.me_fw->data; 2980 2981 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2982 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2983 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2984 2985 gfx_v8_0_cp_gfx_enable(adev, false); 2986 2987 /* PFP */ 2988 fw_data = (const __le32 *) 2989 (adev->gfx.pfp_fw->data + 2990 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2991 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2992 WREG32(mmCP_PFP_UCODE_ADDR, 0); 2993 for (i = 0; i < fw_size; i++) 2994 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2995 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2996 2997 /* CE */ 2998 fw_data = (const __le32 *) 2999 (adev->gfx.ce_fw->data + 3000 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3001 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3002 WREG32(mmCP_CE_UCODE_ADDR, 0); 3003 for (i = 0; i < fw_size; i++) 3004 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3005 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3006 3007 /* ME */ 3008 fw_data = (const __le32 *) 3009 (adev->gfx.me_fw->data + 3010 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3011 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3012 WREG32(mmCP_ME_RAM_WADDR, 0); 3013 for (i = 0; i < fw_size; i++) 3014 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3015 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3016 3017 return 0; 3018 } 3019 3020 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 3021 { 3022 u32 count = 0; 3023 const struct cs_section_def *sect = NULL; 3024 const struct cs_extent_def *ext = NULL; 3025 3026 /* begin clear state */ 3027 count += 2; 3028 /* context control state */ 3029 count += 3; 3030 3031 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 3032 for (ext = sect->section; ext->extent != NULL; ++ext) { 3033 if (sect->id == SECT_CONTEXT) 3034 count += 2 + ext->reg_count; 3035 else 3036 return 0; 3037 } 3038 } 3039 /* pa_sc_raster_config/pa_sc_raster_config1 */ 3040 count += 4; 3041 /* end clear state */ 3042 count += 2; 3043 /* clear state */ 3044 count += 2; 3045 3046 return count; 3047 } 3048 3049 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 3050 { 3051 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3052 const struct cs_section_def *sect = NULL; 3053 const struct cs_extent_def *ext = NULL; 3054 int r, i; 3055 3056 /* init the CP */ 3057 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3058 WREG32(mmCP_ENDIAN_SWAP, 0); 3059 WREG32(mmCP_DEVICE_ID, 1); 3060 3061 gfx_v8_0_cp_gfx_enable(adev, true); 3062 3063 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 3064 if (r) { 3065 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3066 return r; 3067 } 3068 3069 /* clear state buffer */ 3070 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3071 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3072 3073 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3074 amdgpu_ring_write(ring, 0x80000000); 3075 amdgpu_ring_write(ring, 0x80000000); 3076 3077 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 3078 for (ext = sect->section; ext->extent != NULL; ++ext) { 3079 if (sect->id == SECT_CONTEXT) { 3080 amdgpu_ring_write(ring, 3081 PACKET3(PACKET3_SET_CONTEXT_REG, 3082 ext->reg_count)); 3083 amdgpu_ring_write(ring, 3084 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3085 for (i = 0; i < ext->reg_count; i++) 3086 amdgpu_ring_write(ring, ext->extent[i]); 3087 } 3088 } 3089 } 3090 3091 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3092 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 3093 switch (adev->asic_type) { 3094 case CHIP_TONGA: 3095 amdgpu_ring_write(ring, 0x16000012); 3096 amdgpu_ring_write(ring, 0x0000002A); 3097 break; 3098 case CHIP_FIJI: 3099 amdgpu_ring_write(ring, 0x3a00161a); 3100 amdgpu_ring_write(ring, 0x0000002e); 3101 break; 3102 case CHIP_TOPAZ: 3103 case CHIP_CARRIZO: 3104 amdgpu_ring_write(ring, 0x00000002); 3105 amdgpu_ring_write(ring, 0x00000000); 3106 break; 3107 case CHIP_STONEY: 3108 amdgpu_ring_write(ring, 0x00000000); 3109 amdgpu_ring_write(ring, 0x00000000); 3110 break; 3111 default: 3112 BUG(); 3113 } 3114 3115 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3116 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3117 3118 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3119 amdgpu_ring_write(ring, 0); 3120 3121 /* init the CE partitions */ 3122 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3123 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3124 amdgpu_ring_write(ring, 0x8000); 3125 amdgpu_ring_write(ring, 0x8000); 3126 3127 amdgpu_ring_commit(ring); 3128 3129 return 0; 3130 } 3131 3132 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 3133 { 3134 struct amdgpu_ring *ring; 3135 u32 tmp; 3136 u32 rb_bufsz; 3137 u64 rb_addr, rptr_addr; 3138 int r; 3139 3140 /* Set the write pointer delay */ 3141 WREG32(mmCP_RB_WPTR_DELAY, 0); 3142 3143 /* set the RB to use vmid 0 */ 3144 WREG32(mmCP_RB_VMID, 0); 3145 3146 /* Set ring buffer size */ 3147 ring = &adev->gfx.gfx_ring[0]; 3148 rb_bufsz = order_base_2(ring->ring_size / 8); 3149 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3150 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3151 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 3152 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 3153 #ifdef __BIG_ENDIAN 3154 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3155 #endif 3156 WREG32(mmCP_RB0_CNTL, tmp); 3157 3158 /* Initialize the ring buffer's read and write pointers */ 3159 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 3160 ring->wptr = 0; 3161 WREG32(mmCP_RB0_WPTR, ring->wptr); 3162 3163 /* set the wb address wether it's enabled or not */ 3164 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3165 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3166 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 3167 3168 mdelay(1); 3169 WREG32(mmCP_RB0_CNTL, tmp); 3170 3171 rb_addr = ring->gpu_addr >> 8; 3172 WREG32(mmCP_RB0_BASE, rb_addr); 3173 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3174 3175 /* no gfx doorbells on iceland */ 3176 if (adev->asic_type != CHIP_TOPAZ) { 3177 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 3178 if (ring->use_doorbell) { 3179 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3180 DOORBELL_OFFSET, ring->doorbell_index); 3181 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3182 DOORBELL_EN, 1); 3183 } else { 3184 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3185 DOORBELL_EN, 0); 3186 } 3187 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 3188 3189 if (adev->asic_type == CHIP_TONGA) { 3190 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3191 DOORBELL_RANGE_LOWER, 3192 AMDGPU_DOORBELL_GFX_RING0); 3193 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3194 3195 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 3196 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3197 } 3198 3199 } 3200 3201 /* start the ring */ 3202 gfx_v8_0_cp_gfx_start(adev); 3203 ring->ready = true; 3204 r = amdgpu_ring_test_ring(ring); 3205 if (r) { 3206 ring->ready = false; 3207 return r; 3208 } 3209 3210 return 0; 3211 } 3212 3213 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3214 { 3215 int i; 3216 3217 if (enable) { 3218 WREG32(mmCP_MEC_CNTL, 0); 3219 } else { 3220 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3221 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3222 adev->gfx.compute_ring[i].ready = false; 3223 } 3224 udelay(50); 3225 } 3226 3227 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3228 { 3229 const struct gfx_firmware_header_v1_0 *mec_hdr; 3230 const __le32 *fw_data; 3231 unsigned i, fw_size; 3232 3233 if (!adev->gfx.mec_fw) 3234 return -EINVAL; 3235 3236 gfx_v8_0_cp_compute_enable(adev, false); 3237 3238 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3239 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3240 3241 fw_data = (const __le32 *) 3242 (adev->gfx.mec_fw->data + 3243 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3244 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 3245 3246 /* MEC1 */ 3247 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 3248 for (i = 0; i < fw_size; i++) 3249 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 3250 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3251 3252 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3253 if (adev->gfx.mec2_fw) { 3254 const struct gfx_firmware_header_v1_0 *mec2_hdr; 3255 3256 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 3257 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 3258 3259 fw_data = (const __le32 *) 3260 (adev->gfx.mec2_fw->data + 3261 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 3262 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 3263 3264 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 3265 for (i = 0; i < fw_size; i++) 3266 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 3267 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 3268 } 3269 3270 return 0; 3271 } 3272 3273 struct vi_mqd { 3274 uint32_t header; /* ordinal0 */ 3275 uint32_t compute_dispatch_initiator; /* ordinal1 */ 3276 uint32_t compute_dim_x; /* ordinal2 */ 3277 uint32_t compute_dim_y; /* ordinal3 */ 3278 uint32_t compute_dim_z; /* ordinal4 */ 3279 uint32_t compute_start_x; /* ordinal5 */ 3280 uint32_t compute_start_y; /* ordinal6 */ 3281 uint32_t compute_start_z; /* ordinal7 */ 3282 uint32_t compute_num_thread_x; /* ordinal8 */ 3283 uint32_t compute_num_thread_y; /* ordinal9 */ 3284 uint32_t compute_num_thread_z; /* ordinal10 */ 3285 uint32_t compute_pipelinestat_enable; /* ordinal11 */ 3286 uint32_t compute_perfcount_enable; /* ordinal12 */ 3287 uint32_t compute_pgm_lo; /* ordinal13 */ 3288 uint32_t compute_pgm_hi; /* ordinal14 */ 3289 uint32_t compute_tba_lo; /* ordinal15 */ 3290 uint32_t compute_tba_hi; /* ordinal16 */ 3291 uint32_t compute_tma_lo; /* ordinal17 */ 3292 uint32_t compute_tma_hi; /* ordinal18 */ 3293 uint32_t compute_pgm_rsrc1; /* ordinal19 */ 3294 uint32_t compute_pgm_rsrc2; /* ordinal20 */ 3295 uint32_t compute_vmid; /* ordinal21 */ 3296 uint32_t compute_resource_limits; /* ordinal22 */ 3297 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */ 3298 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */ 3299 uint32_t compute_tmpring_size; /* ordinal25 */ 3300 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */ 3301 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */ 3302 uint32_t compute_restart_x; /* ordinal28 */ 3303 uint32_t compute_restart_y; /* ordinal29 */ 3304 uint32_t compute_restart_z; /* ordinal30 */ 3305 uint32_t compute_thread_trace_enable; /* ordinal31 */ 3306 uint32_t compute_misc_reserved; /* ordinal32 */ 3307 uint32_t compute_dispatch_id; /* ordinal33 */ 3308 uint32_t compute_threadgroup_id; /* ordinal34 */ 3309 uint32_t compute_relaunch; /* ordinal35 */ 3310 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */ 3311 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */ 3312 uint32_t compute_wave_restore_control; /* ordinal38 */ 3313 uint32_t reserved9; /* ordinal39 */ 3314 uint32_t reserved10; /* ordinal40 */ 3315 uint32_t reserved11; /* ordinal41 */ 3316 uint32_t reserved12; /* ordinal42 */ 3317 uint32_t reserved13; /* ordinal43 */ 3318 uint32_t reserved14; /* ordinal44 */ 3319 uint32_t reserved15; /* ordinal45 */ 3320 uint32_t reserved16; /* ordinal46 */ 3321 uint32_t reserved17; /* ordinal47 */ 3322 uint32_t reserved18; /* ordinal48 */ 3323 uint32_t reserved19; /* ordinal49 */ 3324 uint32_t reserved20; /* ordinal50 */ 3325 uint32_t reserved21; /* ordinal51 */ 3326 uint32_t reserved22; /* ordinal52 */ 3327 uint32_t reserved23; /* ordinal53 */ 3328 uint32_t reserved24; /* ordinal54 */ 3329 uint32_t reserved25; /* ordinal55 */ 3330 uint32_t reserved26; /* ordinal56 */ 3331 uint32_t reserved27; /* ordinal57 */ 3332 uint32_t reserved28; /* ordinal58 */ 3333 uint32_t reserved29; /* ordinal59 */ 3334 uint32_t reserved30; /* ordinal60 */ 3335 uint32_t reserved31; /* ordinal61 */ 3336 uint32_t reserved32; /* ordinal62 */ 3337 uint32_t reserved33; /* ordinal63 */ 3338 uint32_t reserved34; /* ordinal64 */ 3339 uint32_t compute_user_data_0; /* ordinal65 */ 3340 uint32_t compute_user_data_1; /* ordinal66 */ 3341 uint32_t compute_user_data_2; /* ordinal67 */ 3342 uint32_t compute_user_data_3; /* ordinal68 */ 3343 uint32_t compute_user_data_4; /* ordinal69 */ 3344 uint32_t compute_user_data_5; /* ordinal70 */ 3345 uint32_t compute_user_data_6; /* ordinal71 */ 3346 uint32_t compute_user_data_7; /* ordinal72 */ 3347 uint32_t compute_user_data_8; /* ordinal73 */ 3348 uint32_t compute_user_data_9; /* ordinal74 */ 3349 uint32_t compute_user_data_10; /* ordinal75 */ 3350 uint32_t compute_user_data_11; /* ordinal76 */ 3351 uint32_t compute_user_data_12; /* ordinal77 */ 3352 uint32_t compute_user_data_13; /* ordinal78 */ 3353 uint32_t compute_user_data_14; /* ordinal79 */ 3354 uint32_t compute_user_data_15; /* ordinal80 */ 3355 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */ 3356 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */ 3357 uint32_t reserved35; /* ordinal83 */ 3358 uint32_t reserved36; /* ordinal84 */ 3359 uint32_t reserved37; /* ordinal85 */ 3360 uint32_t cp_mqd_query_time_lo; /* ordinal86 */ 3361 uint32_t cp_mqd_query_time_hi; /* ordinal87 */ 3362 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */ 3363 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */ 3364 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */ 3365 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */ 3366 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */ 3367 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */ 3368 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */ 3369 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */ 3370 uint32_t reserved38; /* ordinal96 */ 3371 uint32_t reserved39; /* ordinal97 */ 3372 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */ 3373 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */ 3374 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */ 3375 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */ 3376 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */ 3377 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */ 3378 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */ 3379 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */ 3380 uint32_t reserved40; /* ordinal106 */ 3381 uint32_t reserved41; /* ordinal107 */ 3382 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */ 3383 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */ 3384 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */ 3385 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */ 3386 uint32_t reserved42; /* ordinal112 */ 3387 uint32_t reserved43; /* ordinal113 */ 3388 uint32_t cp_pq_exe_status_lo; /* ordinal114 */ 3389 uint32_t cp_pq_exe_status_hi; /* ordinal115 */ 3390 uint32_t cp_packet_id_lo; /* ordinal116 */ 3391 uint32_t cp_packet_id_hi; /* ordinal117 */ 3392 uint32_t cp_packet_exe_status_lo; /* ordinal118 */ 3393 uint32_t cp_packet_exe_status_hi; /* ordinal119 */ 3394 uint32_t gds_save_base_addr_lo; /* ordinal120 */ 3395 uint32_t gds_save_base_addr_hi; /* ordinal121 */ 3396 uint32_t gds_save_mask_lo; /* ordinal122 */ 3397 uint32_t gds_save_mask_hi; /* ordinal123 */ 3398 uint32_t ctx_save_base_addr_lo; /* ordinal124 */ 3399 uint32_t ctx_save_base_addr_hi; /* ordinal125 */ 3400 uint32_t reserved44; /* ordinal126 */ 3401 uint32_t reserved45; /* ordinal127 */ 3402 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */ 3403 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */ 3404 uint32_t cp_hqd_active; /* ordinal130 */ 3405 uint32_t cp_hqd_vmid; /* ordinal131 */ 3406 uint32_t cp_hqd_persistent_state; /* ordinal132 */ 3407 uint32_t cp_hqd_pipe_priority; /* ordinal133 */ 3408 uint32_t cp_hqd_queue_priority; /* ordinal134 */ 3409 uint32_t cp_hqd_quantum; /* ordinal135 */ 3410 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */ 3411 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */ 3412 uint32_t cp_hqd_pq_rptr; /* ordinal138 */ 3413 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */ 3414 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */ 3415 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */ 3416 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */ 3417 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */ 3418 uint32_t cp_hqd_pq_wptr; /* ordinal144 */ 3419 uint32_t cp_hqd_pq_control; /* ordinal145 */ 3420 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */ 3421 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */ 3422 uint32_t cp_hqd_ib_rptr; /* ordinal148 */ 3423 uint32_t cp_hqd_ib_control; /* ordinal149 */ 3424 uint32_t cp_hqd_iq_timer; /* ordinal150 */ 3425 uint32_t cp_hqd_iq_rptr; /* ordinal151 */ 3426 uint32_t cp_hqd_dequeue_request; /* ordinal152 */ 3427 uint32_t cp_hqd_dma_offload; /* ordinal153 */ 3428 uint32_t cp_hqd_sema_cmd; /* ordinal154 */ 3429 uint32_t cp_hqd_msg_type; /* ordinal155 */ 3430 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */ 3431 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */ 3432 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */ 3433 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */ 3434 uint32_t cp_hqd_hq_status0; /* ordinal160 */ 3435 uint32_t cp_hqd_hq_control0; /* ordinal161 */ 3436 uint32_t cp_mqd_control; /* ordinal162 */ 3437 uint32_t cp_hqd_hq_status1; /* ordinal163 */ 3438 uint32_t cp_hqd_hq_control1; /* ordinal164 */ 3439 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */ 3440 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */ 3441 uint32_t cp_hqd_eop_control; /* ordinal167 */ 3442 uint32_t cp_hqd_eop_rptr; /* ordinal168 */ 3443 uint32_t cp_hqd_eop_wptr; /* ordinal169 */ 3444 uint32_t cp_hqd_eop_done_events; /* ordinal170 */ 3445 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */ 3446 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */ 3447 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */ 3448 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */ 3449 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */ 3450 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */ 3451 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */ 3452 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */ 3453 uint32_t cp_hqd_error; /* ordinal179 */ 3454 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */ 3455 uint32_t cp_hqd_eop_dones; /* ordinal181 */ 3456 uint32_t reserved46; /* ordinal182 */ 3457 uint32_t reserved47; /* ordinal183 */ 3458 uint32_t reserved48; /* ordinal184 */ 3459 uint32_t reserved49; /* ordinal185 */ 3460 uint32_t reserved50; /* ordinal186 */ 3461 uint32_t reserved51; /* ordinal187 */ 3462 uint32_t reserved52; /* ordinal188 */ 3463 uint32_t reserved53; /* ordinal189 */ 3464 uint32_t reserved54; /* ordinal190 */ 3465 uint32_t reserved55; /* ordinal191 */ 3466 uint32_t iqtimer_pkt_header; /* ordinal192 */ 3467 uint32_t iqtimer_pkt_dw0; /* ordinal193 */ 3468 uint32_t iqtimer_pkt_dw1; /* ordinal194 */ 3469 uint32_t iqtimer_pkt_dw2; /* ordinal195 */ 3470 uint32_t iqtimer_pkt_dw3; /* ordinal196 */ 3471 uint32_t iqtimer_pkt_dw4; /* ordinal197 */ 3472 uint32_t iqtimer_pkt_dw5; /* ordinal198 */ 3473 uint32_t iqtimer_pkt_dw6; /* ordinal199 */ 3474 uint32_t iqtimer_pkt_dw7; /* ordinal200 */ 3475 uint32_t iqtimer_pkt_dw8; /* ordinal201 */ 3476 uint32_t iqtimer_pkt_dw9; /* ordinal202 */ 3477 uint32_t iqtimer_pkt_dw10; /* ordinal203 */ 3478 uint32_t iqtimer_pkt_dw11; /* ordinal204 */ 3479 uint32_t iqtimer_pkt_dw12; /* ordinal205 */ 3480 uint32_t iqtimer_pkt_dw13; /* ordinal206 */ 3481 uint32_t iqtimer_pkt_dw14; /* ordinal207 */ 3482 uint32_t iqtimer_pkt_dw15; /* ordinal208 */ 3483 uint32_t iqtimer_pkt_dw16; /* ordinal209 */ 3484 uint32_t iqtimer_pkt_dw17; /* ordinal210 */ 3485 uint32_t iqtimer_pkt_dw18; /* ordinal211 */ 3486 uint32_t iqtimer_pkt_dw19; /* ordinal212 */ 3487 uint32_t iqtimer_pkt_dw20; /* ordinal213 */ 3488 uint32_t iqtimer_pkt_dw21; /* ordinal214 */ 3489 uint32_t iqtimer_pkt_dw22; /* ordinal215 */ 3490 uint32_t iqtimer_pkt_dw23; /* ordinal216 */ 3491 uint32_t iqtimer_pkt_dw24; /* ordinal217 */ 3492 uint32_t iqtimer_pkt_dw25; /* ordinal218 */ 3493 uint32_t iqtimer_pkt_dw26; /* ordinal219 */ 3494 uint32_t iqtimer_pkt_dw27; /* ordinal220 */ 3495 uint32_t iqtimer_pkt_dw28; /* ordinal221 */ 3496 uint32_t iqtimer_pkt_dw29; /* ordinal222 */ 3497 uint32_t iqtimer_pkt_dw30; /* ordinal223 */ 3498 uint32_t iqtimer_pkt_dw31; /* ordinal224 */ 3499 uint32_t reserved56; /* ordinal225 */ 3500 uint32_t reserved57; /* ordinal226 */ 3501 uint32_t reserved58; /* ordinal227 */ 3502 uint32_t set_resources_header; /* ordinal228 */ 3503 uint32_t set_resources_dw1; /* ordinal229 */ 3504 uint32_t set_resources_dw2; /* ordinal230 */ 3505 uint32_t set_resources_dw3; /* ordinal231 */ 3506 uint32_t set_resources_dw4; /* ordinal232 */ 3507 uint32_t set_resources_dw5; /* ordinal233 */ 3508 uint32_t set_resources_dw6; /* ordinal234 */ 3509 uint32_t set_resources_dw7; /* ordinal235 */ 3510 uint32_t reserved59; /* ordinal236 */ 3511 uint32_t reserved60; /* ordinal237 */ 3512 uint32_t reserved61; /* ordinal238 */ 3513 uint32_t reserved62; /* ordinal239 */ 3514 uint32_t reserved63; /* ordinal240 */ 3515 uint32_t reserved64; /* ordinal241 */ 3516 uint32_t reserved65; /* ordinal242 */ 3517 uint32_t reserved66; /* ordinal243 */ 3518 uint32_t reserved67; /* ordinal244 */ 3519 uint32_t reserved68; /* ordinal245 */ 3520 uint32_t reserved69; /* ordinal246 */ 3521 uint32_t reserved70; /* ordinal247 */ 3522 uint32_t reserved71; /* ordinal248 */ 3523 uint32_t reserved72; /* ordinal249 */ 3524 uint32_t reserved73; /* ordinal250 */ 3525 uint32_t reserved74; /* ordinal251 */ 3526 uint32_t reserved75; /* ordinal252 */ 3527 uint32_t reserved76; /* ordinal253 */ 3528 uint32_t reserved77; /* ordinal254 */ 3529 uint32_t reserved78; /* ordinal255 */ 3530 3531 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */ 3532 }; 3533 3534 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 3535 { 3536 int i, r; 3537 3538 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3539 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3540 3541 if (ring->mqd_obj) { 3542 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3543 if (unlikely(r != 0)) 3544 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 3545 3546 amdgpu_bo_unpin(ring->mqd_obj); 3547 amdgpu_bo_unreserve(ring->mqd_obj); 3548 3549 amdgpu_bo_unref(&ring->mqd_obj); 3550 ring->mqd_obj = NULL; 3551 } 3552 } 3553 } 3554 3555 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 3556 { 3557 int r, i, j; 3558 u32 tmp; 3559 bool use_doorbell = true; 3560 u64 hqd_gpu_addr; 3561 u64 mqd_gpu_addr; 3562 u64 eop_gpu_addr; 3563 u64 wb_gpu_addr; 3564 u32 *buf; 3565 struct vi_mqd *mqd; 3566 3567 /* init the pipes */ 3568 mutex_lock(&adev->srbm_mutex); 3569 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 3570 int me = (i < 4) ? 1 : 2; 3571 int pipe = (i < 4) ? i : (i - 4); 3572 3573 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 3574 eop_gpu_addr >>= 8; 3575 3576 vi_srbm_select(adev, me, pipe, 0, 0); 3577 3578 /* write the EOP addr */ 3579 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 3580 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 3581 3582 /* set the VMID assigned */ 3583 WREG32(mmCP_HQD_VMID, 0); 3584 3585 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3586 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 3587 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3588 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 3589 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 3590 } 3591 vi_srbm_select(adev, 0, 0, 0, 0); 3592 mutex_unlock(&adev->srbm_mutex); 3593 3594 /* init the queues. Just two for now. */ 3595 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3596 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3597 3598 if (ring->mqd_obj == NULL) { 3599 r = amdgpu_bo_create(adev, 3600 sizeof(struct vi_mqd), 3601 PAGE_SIZE, true, 3602 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 3603 NULL, &ring->mqd_obj); 3604 if (r) { 3605 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 3606 return r; 3607 } 3608 } 3609 3610 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3611 if (unlikely(r != 0)) { 3612 gfx_v8_0_cp_compute_fini(adev); 3613 return r; 3614 } 3615 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 3616 &mqd_gpu_addr); 3617 if (r) { 3618 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 3619 gfx_v8_0_cp_compute_fini(adev); 3620 return r; 3621 } 3622 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 3623 if (r) { 3624 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 3625 gfx_v8_0_cp_compute_fini(adev); 3626 return r; 3627 } 3628 3629 /* init the mqd struct */ 3630 memset(buf, 0, sizeof(struct vi_mqd)); 3631 3632 mqd = (struct vi_mqd *)buf; 3633 mqd->header = 0xC0310800; 3634 mqd->compute_pipelinestat_enable = 0x00000001; 3635 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3636 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3637 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3638 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3639 mqd->compute_misc_reserved = 0x00000003; 3640 3641 mutex_lock(&adev->srbm_mutex); 3642 vi_srbm_select(adev, ring->me, 3643 ring->pipe, 3644 ring->queue, 0); 3645 3646 /* disable wptr polling */ 3647 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 3648 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3649 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 3650 3651 mqd->cp_hqd_eop_base_addr_lo = 3652 RREG32(mmCP_HQD_EOP_BASE_ADDR); 3653 mqd->cp_hqd_eop_base_addr_hi = 3654 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 3655 3656 /* enable doorbell? */ 3657 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 3658 if (use_doorbell) { 3659 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 3660 } else { 3661 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 3662 } 3663 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 3664 mqd->cp_hqd_pq_doorbell_control = tmp; 3665 3666 /* disable the queue if it's active */ 3667 mqd->cp_hqd_dequeue_request = 0; 3668 mqd->cp_hqd_pq_rptr = 0; 3669 mqd->cp_hqd_pq_wptr= 0; 3670 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 3671 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 3672 for (j = 0; j < adev->usec_timeout; j++) { 3673 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 3674 break; 3675 udelay(1); 3676 } 3677 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 3678 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 3679 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 3680 } 3681 3682 /* set the pointer to the MQD */ 3683 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 3684 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3685 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 3686 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 3687 3688 /* set MQD vmid to 0 */ 3689 tmp = RREG32(mmCP_MQD_CONTROL); 3690 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3691 WREG32(mmCP_MQD_CONTROL, tmp); 3692 mqd->cp_mqd_control = tmp; 3693 3694 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3695 hqd_gpu_addr = ring->gpu_addr >> 8; 3696 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3697 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3698 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 3699 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 3700 3701 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3702 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 3703 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3704 (order_base_2(ring->ring_size / 4) - 1)); 3705 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3706 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3707 #ifdef __BIG_ENDIAN 3708 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3709 #endif 3710 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3711 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3712 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3713 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3714 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 3715 mqd->cp_hqd_pq_control = tmp; 3716 3717 /* set the wb address wether it's enabled or not */ 3718 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3719 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3720 mqd->cp_hqd_pq_rptr_report_addr_hi = 3721 upper_32_bits(wb_gpu_addr) & 0xffff; 3722 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3723 mqd->cp_hqd_pq_rptr_report_addr_lo); 3724 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3725 mqd->cp_hqd_pq_rptr_report_addr_hi); 3726 3727 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3728 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3729 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 3730 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3731 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr); 3732 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3733 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3734 3735 /* enable the doorbell if requested */ 3736 if (use_doorbell) { 3737 if ((adev->asic_type == CHIP_CARRIZO) || 3738 (adev->asic_type == CHIP_FIJI) || 3739 (adev->asic_type == CHIP_STONEY)) { 3740 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 3741 AMDGPU_DOORBELL_KIQ << 2); 3742 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 3743 AMDGPU_DOORBELL_MEC_RING7 << 2); 3744 } 3745 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 3746 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3747 DOORBELL_OFFSET, ring->doorbell_index); 3748 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 3749 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 3750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 3751 mqd->cp_hqd_pq_doorbell_control = tmp; 3752 3753 } else { 3754 mqd->cp_hqd_pq_doorbell_control = 0; 3755 } 3756 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 3757 mqd->cp_hqd_pq_doorbell_control); 3758 3759 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3760 ring->wptr = 0; 3761 mqd->cp_hqd_pq_wptr = ring->wptr; 3762 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 3763 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 3764 3765 /* set the vmid for the queue */ 3766 mqd->cp_hqd_vmid = 0; 3767 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3768 3769 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 3770 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3771 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 3772 mqd->cp_hqd_persistent_state = tmp; 3773 if (adev->asic_type == CHIP_STONEY) { 3774 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 3775 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 3776 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 3777 } 3778 3779 /* activate the queue */ 3780 mqd->cp_hqd_active = 1; 3781 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 3782 3783 vi_srbm_select(adev, 0, 0, 0, 0); 3784 mutex_unlock(&adev->srbm_mutex); 3785 3786 amdgpu_bo_kunmap(ring->mqd_obj); 3787 amdgpu_bo_unreserve(ring->mqd_obj); 3788 } 3789 3790 if (use_doorbell) { 3791 tmp = RREG32(mmCP_PQ_STATUS); 3792 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3793 WREG32(mmCP_PQ_STATUS, tmp); 3794 } 3795 3796 gfx_v8_0_cp_compute_enable(adev, true); 3797 3798 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3799 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3800 3801 ring->ready = true; 3802 r = amdgpu_ring_test_ring(ring); 3803 if (r) 3804 ring->ready = false; 3805 } 3806 3807 return 0; 3808 } 3809 3810 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 3811 { 3812 int r; 3813 3814 if (!(adev->flags & AMD_IS_APU)) 3815 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 3816 3817 if (!adev->pp_enabled) { 3818 if (!adev->firmware.smu_load) { 3819 /* legacy firmware loading */ 3820 r = gfx_v8_0_cp_gfx_load_microcode(adev); 3821 if (r) 3822 return r; 3823 3824 r = gfx_v8_0_cp_compute_load_microcode(adev); 3825 if (r) 3826 return r; 3827 } else { 3828 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 3829 AMDGPU_UCODE_ID_CP_CE); 3830 if (r) 3831 return -EINVAL; 3832 3833 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 3834 AMDGPU_UCODE_ID_CP_PFP); 3835 if (r) 3836 return -EINVAL; 3837 3838 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 3839 AMDGPU_UCODE_ID_CP_ME); 3840 if (r) 3841 return -EINVAL; 3842 3843 if (adev->asic_type == CHIP_TOPAZ) { 3844 r = gfx_v8_0_cp_compute_load_microcode(adev); 3845 if (r) 3846 return r; 3847 } else { 3848 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 3849 AMDGPU_UCODE_ID_CP_MEC1); 3850 if (r) 3851 return -EINVAL; 3852 } 3853 } 3854 } 3855 3856 r = gfx_v8_0_cp_gfx_resume(adev); 3857 if (r) 3858 return r; 3859 3860 r = gfx_v8_0_cp_compute_resume(adev); 3861 if (r) 3862 return r; 3863 3864 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 3865 3866 return 0; 3867 } 3868 3869 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 3870 { 3871 gfx_v8_0_cp_gfx_enable(adev, enable); 3872 gfx_v8_0_cp_compute_enable(adev, enable); 3873 } 3874 3875 static int gfx_v8_0_hw_init(void *handle) 3876 { 3877 int r; 3878 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3879 3880 gfx_v8_0_init_golden_registers(adev); 3881 3882 gfx_v8_0_gpu_init(adev); 3883 3884 r = gfx_v8_0_rlc_resume(adev); 3885 if (r) 3886 return r; 3887 3888 r = gfx_v8_0_cp_resume(adev); 3889 if (r) 3890 return r; 3891 3892 return r; 3893 } 3894 3895 static int gfx_v8_0_hw_fini(void *handle) 3896 { 3897 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3898 3899 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3900 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3901 gfx_v8_0_cp_enable(adev, false); 3902 gfx_v8_0_rlc_stop(adev); 3903 gfx_v8_0_cp_compute_fini(adev); 3904 3905 return 0; 3906 } 3907 3908 static int gfx_v8_0_suspend(void *handle) 3909 { 3910 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3911 3912 return gfx_v8_0_hw_fini(adev); 3913 } 3914 3915 static int gfx_v8_0_resume(void *handle) 3916 { 3917 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3918 3919 return gfx_v8_0_hw_init(adev); 3920 } 3921 3922 static bool gfx_v8_0_is_idle(void *handle) 3923 { 3924 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3925 3926 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 3927 return false; 3928 else 3929 return true; 3930 } 3931 3932 static int gfx_v8_0_wait_for_idle(void *handle) 3933 { 3934 unsigned i; 3935 u32 tmp; 3936 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3937 3938 for (i = 0; i < adev->usec_timeout; i++) { 3939 /* read MC_STATUS */ 3940 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK; 3941 3942 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 3943 return 0; 3944 udelay(1); 3945 } 3946 return -ETIMEDOUT; 3947 } 3948 3949 static void gfx_v8_0_print_status(void *handle) 3950 { 3951 int i; 3952 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3953 3954 dev_info(adev->dev, "GFX 8.x registers\n"); 3955 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", 3956 RREG32(mmGRBM_STATUS)); 3957 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", 3958 RREG32(mmGRBM_STATUS2)); 3959 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", 3960 RREG32(mmGRBM_STATUS_SE0)); 3961 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", 3962 RREG32(mmGRBM_STATUS_SE1)); 3963 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", 3964 RREG32(mmGRBM_STATUS_SE2)); 3965 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", 3966 RREG32(mmGRBM_STATUS_SE3)); 3967 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); 3968 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 3969 RREG32(mmCP_STALLED_STAT1)); 3970 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", 3971 RREG32(mmCP_STALLED_STAT2)); 3972 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", 3973 RREG32(mmCP_STALLED_STAT3)); 3974 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", 3975 RREG32(mmCP_CPF_BUSY_STAT)); 3976 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", 3977 RREG32(mmCP_CPF_STALLED_STAT1)); 3978 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); 3979 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); 3980 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", 3981 RREG32(mmCP_CPC_STALLED_STAT1)); 3982 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); 3983 3984 for (i = 0; i < 32; i++) { 3985 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n", 3986 i, RREG32(mmGB_TILE_MODE0 + (i * 4))); 3987 } 3988 for (i = 0; i < 16; i++) { 3989 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n", 3990 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4))); 3991 } 3992 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3993 dev_info(adev->dev, " se: %d\n", i); 3994 gfx_v8_0_select_se_sh(adev, i, 0xffffffff); 3995 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n", 3996 RREG32(mmPA_SC_RASTER_CONFIG)); 3997 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n", 3998 RREG32(mmPA_SC_RASTER_CONFIG_1)); 3999 } 4000 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 4001 4002 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n", 4003 RREG32(mmGB_ADDR_CONFIG)); 4004 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n", 4005 RREG32(mmHDP_ADDR_CONFIG)); 4006 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", 4007 RREG32(mmDMIF_ADDR_CALC)); 4008 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n", 4009 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET)); 4010 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n", 4011 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET)); 4012 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", 4013 RREG32(mmUVD_UDEC_ADDR_CONFIG)); 4014 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", 4015 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); 4016 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", 4017 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); 4018 4019 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", 4020 RREG32(mmCP_MEQ_THRESHOLDS)); 4021 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n", 4022 RREG32(mmSX_DEBUG_1)); 4023 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n", 4024 RREG32(mmTA_CNTL_AUX)); 4025 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n", 4026 RREG32(mmSPI_CONFIG_CNTL)); 4027 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n", 4028 RREG32(mmSQ_CONFIG)); 4029 dev_info(adev->dev, " DB_DEBUG=0x%08X\n", 4030 RREG32(mmDB_DEBUG)); 4031 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n", 4032 RREG32(mmDB_DEBUG2)); 4033 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n", 4034 RREG32(mmDB_DEBUG3)); 4035 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n", 4036 RREG32(mmCB_HW_CONTROL)); 4037 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n", 4038 RREG32(mmSPI_CONFIG_CNTL_1)); 4039 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n", 4040 RREG32(mmPA_SC_FIFO_SIZE)); 4041 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n", 4042 RREG32(mmVGT_NUM_INSTANCES)); 4043 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n", 4044 RREG32(mmCP_PERFMON_CNTL)); 4045 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n", 4046 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS)); 4047 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n", 4048 RREG32(mmVGT_CACHE_INVALIDATION)); 4049 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n", 4050 RREG32(mmVGT_GS_VERTEX_REUSE)); 4051 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n", 4052 RREG32(mmPA_SC_LINE_STIPPLE_STATE)); 4053 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n", 4054 RREG32(mmPA_CL_ENHANCE)); 4055 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n", 4056 RREG32(mmPA_SC_ENHANCE)); 4057 4058 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n", 4059 RREG32(mmCP_ME_CNTL)); 4060 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n", 4061 RREG32(mmCP_MAX_CONTEXT)); 4062 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n", 4063 RREG32(mmCP_ENDIAN_SWAP)); 4064 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n", 4065 RREG32(mmCP_DEVICE_ID)); 4066 4067 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n", 4068 RREG32(mmCP_SEM_WAIT_TIMER)); 4069 4070 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n", 4071 RREG32(mmCP_RB_WPTR_DELAY)); 4072 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n", 4073 RREG32(mmCP_RB_VMID)); 4074 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", 4075 RREG32(mmCP_RB0_CNTL)); 4076 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n", 4077 RREG32(mmCP_RB0_WPTR)); 4078 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n", 4079 RREG32(mmCP_RB0_RPTR_ADDR)); 4080 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n", 4081 RREG32(mmCP_RB0_RPTR_ADDR_HI)); 4082 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", 4083 RREG32(mmCP_RB0_CNTL)); 4084 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n", 4085 RREG32(mmCP_RB0_BASE)); 4086 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n", 4087 RREG32(mmCP_RB0_BASE_HI)); 4088 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n", 4089 RREG32(mmCP_MEC_CNTL)); 4090 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n", 4091 RREG32(mmCP_CPF_DEBUG)); 4092 4093 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n", 4094 RREG32(mmSCRATCH_ADDR)); 4095 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n", 4096 RREG32(mmSCRATCH_UMSK)); 4097 4098 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n", 4099 RREG32(mmCP_INT_CNTL_RING0)); 4100 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", 4101 RREG32(mmRLC_LB_CNTL)); 4102 dev_info(adev->dev, " RLC_CNTL=0x%08X\n", 4103 RREG32(mmRLC_CNTL)); 4104 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n", 4105 RREG32(mmRLC_CGCG_CGLS_CTRL)); 4106 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n", 4107 RREG32(mmRLC_LB_CNTR_INIT)); 4108 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n", 4109 RREG32(mmRLC_LB_CNTR_MAX)); 4110 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n", 4111 RREG32(mmRLC_LB_INIT_CU_MASK)); 4112 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n", 4113 RREG32(mmRLC_LB_PARAMS)); 4114 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", 4115 RREG32(mmRLC_LB_CNTL)); 4116 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n", 4117 RREG32(mmRLC_MC_CNTL)); 4118 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n", 4119 RREG32(mmRLC_UCODE_CNTL)); 4120 4121 mutex_lock(&adev->srbm_mutex); 4122 for (i = 0; i < 16; i++) { 4123 vi_srbm_select(adev, 0, 0, 0, i); 4124 dev_info(adev->dev, " VM %d:\n", i); 4125 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n", 4126 RREG32(mmSH_MEM_CONFIG)); 4127 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n", 4128 RREG32(mmSH_MEM_APE1_BASE)); 4129 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n", 4130 RREG32(mmSH_MEM_APE1_LIMIT)); 4131 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n", 4132 RREG32(mmSH_MEM_BASES)); 4133 } 4134 vi_srbm_select(adev, 0, 0, 0, 0); 4135 mutex_unlock(&adev->srbm_mutex); 4136 } 4137 4138 static int gfx_v8_0_soft_reset(void *handle) 4139 { 4140 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4141 u32 tmp; 4142 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4143 4144 /* GRBM_STATUS */ 4145 tmp = RREG32(mmGRBM_STATUS); 4146 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4147 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4148 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4149 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4150 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4151 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4152 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4153 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4154 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4155 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4156 } 4157 4158 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4159 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4160 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4161 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4162 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4163 } 4164 4165 /* GRBM_STATUS2 */ 4166 tmp = RREG32(mmGRBM_STATUS2); 4167 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4168 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4169 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4170 4171 /* SRBM_STATUS */ 4172 tmp = RREG32(mmSRBM_STATUS); 4173 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 4174 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4175 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4176 4177 if (grbm_soft_reset || srbm_soft_reset) { 4178 gfx_v8_0_print_status((void *)adev); 4179 /* stop the rlc */ 4180 gfx_v8_0_rlc_stop(adev); 4181 4182 /* Disable GFX parsing/prefetching */ 4183 gfx_v8_0_cp_gfx_enable(adev, false); 4184 4185 /* Disable MEC parsing/prefetching */ 4186 gfx_v8_0_cp_compute_enable(adev, false); 4187 4188 if (grbm_soft_reset || srbm_soft_reset) { 4189 tmp = RREG32(mmGMCON_DEBUG); 4190 tmp = REG_SET_FIELD(tmp, 4191 GMCON_DEBUG, GFX_STALL, 1); 4192 tmp = REG_SET_FIELD(tmp, 4193 GMCON_DEBUG, GFX_CLEAR, 1); 4194 WREG32(mmGMCON_DEBUG, tmp); 4195 4196 udelay(50); 4197 } 4198 4199 if (grbm_soft_reset) { 4200 tmp = RREG32(mmGRBM_SOFT_RESET); 4201 tmp |= grbm_soft_reset; 4202 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4203 WREG32(mmGRBM_SOFT_RESET, tmp); 4204 tmp = RREG32(mmGRBM_SOFT_RESET); 4205 4206 udelay(50); 4207 4208 tmp &= ~grbm_soft_reset; 4209 WREG32(mmGRBM_SOFT_RESET, tmp); 4210 tmp = RREG32(mmGRBM_SOFT_RESET); 4211 } 4212 4213 if (srbm_soft_reset) { 4214 tmp = RREG32(mmSRBM_SOFT_RESET); 4215 tmp |= srbm_soft_reset; 4216 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4217 WREG32(mmSRBM_SOFT_RESET, tmp); 4218 tmp = RREG32(mmSRBM_SOFT_RESET); 4219 4220 udelay(50); 4221 4222 tmp &= ~srbm_soft_reset; 4223 WREG32(mmSRBM_SOFT_RESET, tmp); 4224 tmp = RREG32(mmSRBM_SOFT_RESET); 4225 } 4226 4227 if (grbm_soft_reset || srbm_soft_reset) { 4228 tmp = RREG32(mmGMCON_DEBUG); 4229 tmp = REG_SET_FIELD(tmp, 4230 GMCON_DEBUG, GFX_STALL, 0); 4231 tmp = REG_SET_FIELD(tmp, 4232 GMCON_DEBUG, GFX_CLEAR, 0); 4233 WREG32(mmGMCON_DEBUG, tmp); 4234 } 4235 4236 /* Wait a little for things to settle down */ 4237 udelay(50); 4238 gfx_v8_0_print_status((void *)adev); 4239 } 4240 return 0; 4241 } 4242 4243 /** 4244 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 4245 * 4246 * @adev: amdgpu_device pointer 4247 * 4248 * Fetches a GPU clock counter snapshot. 4249 * Returns the 64 bit clock counter snapshot. 4250 */ 4251 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4252 { 4253 uint64_t clock; 4254 4255 mutex_lock(&adev->gfx.gpu_clock_mutex); 4256 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4257 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 4258 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4259 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4260 return clock; 4261 } 4262 4263 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4264 uint32_t vmid, 4265 uint32_t gds_base, uint32_t gds_size, 4266 uint32_t gws_base, uint32_t gws_size, 4267 uint32_t oa_base, uint32_t oa_size) 4268 { 4269 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 4270 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 4271 4272 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 4273 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 4274 4275 oa_base = oa_base >> AMDGPU_OA_SHIFT; 4276 oa_size = oa_size >> AMDGPU_OA_SHIFT; 4277 4278 /* GDS Base */ 4279 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4280 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4281 WRITE_DATA_DST_SEL(0))); 4282 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 4283 amdgpu_ring_write(ring, 0); 4284 amdgpu_ring_write(ring, gds_base); 4285 4286 /* GDS Size */ 4287 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4288 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4289 WRITE_DATA_DST_SEL(0))); 4290 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 4291 amdgpu_ring_write(ring, 0); 4292 amdgpu_ring_write(ring, gds_size); 4293 4294 /* GWS */ 4295 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4296 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4297 WRITE_DATA_DST_SEL(0))); 4298 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 4299 amdgpu_ring_write(ring, 0); 4300 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4301 4302 /* OA */ 4303 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4304 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4305 WRITE_DATA_DST_SEL(0))); 4306 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 4307 amdgpu_ring_write(ring, 0); 4308 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 4309 } 4310 4311 static int gfx_v8_0_early_init(void *handle) 4312 { 4313 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4314 4315 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 4316 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 4317 gfx_v8_0_set_ring_funcs(adev); 4318 gfx_v8_0_set_irq_funcs(adev); 4319 gfx_v8_0_set_gds_init(adev); 4320 4321 return 0; 4322 } 4323 4324 static int gfx_v8_0_late_init(void *handle) 4325 { 4326 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4327 int r; 4328 4329 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4330 if (r) 4331 return r; 4332 4333 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4334 if (r) 4335 return r; 4336 4337 /* requires IBs so do in late init after IB pool is initialized */ 4338 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 4339 if (r) 4340 return r; 4341 4342 return 0; 4343 } 4344 4345 static int gfx_v8_0_set_powergating_state(void *handle, 4346 enum amd_powergating_state state) 4347 { 4348 return 0; 4349 } 4350 4351 static void fiji_send_serdes_cmd(struct amdgpu_device *adev, 4352 uint32_t reg_addr, uint32_t cmd) 4353 { 4354 uint32_t data; 4355 4356 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 4357 4358 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 4359 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 4360 4361 data = RREG32(mmRLC_SERDES_WR_CTRL); 4362 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 4363 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 4364 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 4365 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 4366 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 4367 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 4368 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 4369 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 4370 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 4371 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 4372 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 4373 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 4374 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 4375 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 4376 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 4377 4378 WREG32(mmRLC_SERDES_WR_CTRL, data); 4379 } 4380 4381 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4382 bool enable) 4383 { 4384 uint32_t temp, data; 4385 4386 /* It is disabled by HW by default */ 4387 if (enable) { 4388 /* 1 - RLC memory Light sleep */ 4389 temp = data = RREG32(mmRLC_MEM_SLP_CNTL); 4390 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4391 if (temp != data) 4392 WREG32(mmRLC_MEM_SLP_CNTL, data); 4393 4394 /* 2 - CP memory Light sleep */ 4395 temp = data = RREG32(mmCP_MEM_SLP_CNTL); 4396 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4397 if (temp != data) 4398 WREG32(mmCP_MEM_SLP_CNTL, data); 4399 4400 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 4401 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 4402 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 4403 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 4404 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 4405 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 4406 4407 if (temp != data) 4408 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 4409 4410 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4411 gfx_v8_0_wait_for_rlc_serdes(adev); 4412 4413 /* 5 - clear mgcg override */ 4414 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 4415 4416 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 4417 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 4418 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 4419 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 4420 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 4421 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 4422 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 4423 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 4424 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 4425 if (temp != data) 4426 WREG32(mmCGTS_SM_CTRL_REG, data); 4427 udelay(50); 4428 4429 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4430 gfx_v8_0_wait_for_rlc_serdes(adev); 4431 } else { 4432 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 4433 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 4434 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 4435 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 4436 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 4437 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 4438 if (temp != data) 4439 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 4440 4441 /* 2 - disable MGLS in RLC */ 4442 data = RREG32(mmRLC_MEM_SLP_CNTL); 4443 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4444 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4445 WREG32(mmRLC_MEM_SLP_CNTL, data); 4446 } 4447 4448 /* 3 - disable MGLS in CP */ 4449 data = RREG32(mmCP_MEM_SLP_CNTL); 4450 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4451 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4452 WREG32(mmCP_MEM_SLP_CNTL, data); 4453 } 4454 4455 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 4456 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 4457 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 4458 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 4459 if (temp != data) 4460 WREG32(mmCGTS_SM_CTRL_REG, data); 4461 4462 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4463 gfx_v8_0_wait_for_rlc_serdes(adev); 4464 4465 /* 6 - set mgcg override */ 4466 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 4467 4468 udelay(50); 4469 4470 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4471 gfx_v8_0_wait_for_rlc_serdes(adev); 4472 } 4473 } 4474 4475 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4476 bool enable) 4477 { 4478 uint32_t temp, temp1, data, data1; 4479 4480 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 4481 4482 if (enable) { 4483 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ 4484 * Cmp_busy/GFX_Idle interrupts 4485 */ 4486 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4487 4488 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 4489 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 4490 if (temp1 != data1) 4491 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 4492 4493 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4494 gfx_v8_0_wait_for_rlc_serdes(adev); 4495 4496 /* 3 - clear cgcg override */ 4497 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 4498 4499 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4500 gfx_v8_0_wait_for_rlc_serdes(adev); 4501 4502 /* 4 - write cmd to set CGLS */ 4503 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 4504 4505 /* 5 - enable cgcg */ 4506 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4507 4508 /* enable cgls*/ 4509 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4510 4511 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 4512 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 4513 4514 if (temp1 != data1) 4515 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 4516 4517 if (temp != data) 4518 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 4519 } else { 4520 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 4521 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4522 4523 /* TEST CGCG */ 4524 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 4525 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 4526 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 4527 if (temp1 != data1) 4528 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 4529 4530 /* read gfx register to wake up cgcg */ 4531 RREG32(mmCB_CGTT_SCLK_CTRL); 4532 RREG32(mmCB_CGTT_SCLK_CTRL); 4533 RREG32(mmCB_CGTT_SCLK_CTRL); 4534 RREG32(mmCB_CGTT_SCLK_CTRL); 4535 4536 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4537 gfx_v8_0_wait_for_rlc_serdes(adev); 4538 4539 /* write cmd to Set CGCG Overrride */ 4540 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 4541 4542 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 4543 gfx_v8_0_wait_for_rlc_serdes(adev); 4544 4545 /* write cmd to Clear CGLS */ 4546 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 4547 4548 /* disable cgcg, cgls should be disabled too. */ 4549 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4550 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4551 if (temp != data) 4552 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 4553 } 4554 } 4555 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev, 4556 bool enable) 4557 { 4558 if (enable) { 4559 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 4560 * === MGCG + MGLS + TS(CG/LS) === 4561 */ 4562 fiji_update_medium_grain_clock_gating(adev, enable); 4563 fiji_update_coarse_grain_clock_gating(adev, enable); 4564 } else { 4565 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 4566 * === CGCG + CGLS === 4567 */ 4568 fiji_update_coarse_grain_clock_gating(adev, enable); 4569 fiji_update_medium_grain_clock_gating(adev, enable); 4570 } 4571 return 0; 4572 } 4573 4574 static int gfx_v8_0_set_clockgating_state(void *handle, 4575 enum amd_clockgating_state state) 4576 { 4577 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4578 4579 switch (adev->asic_type) { 4580 case CHIP_FIJI: 4581 fiji_update_gfx_clock_gating(adev, 4582 state == AMD_CG_STATE_GATE ? true : false); 4583 break; 4584 default: 4585 break; 4586 } 4587 return 0; 4588 } 4589 4590 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4591 { 4592 u32 rptr; 4593 4594 rptr = ring->adev->wb.wb[ring->rptr_offs]; 4595 4596 return rptr; 4597 } 4598 4599 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4600 { 4601 struct amdgpu_device *adev = ring->adev; 4602 u32 wptr; 4603 4604 if (ring->use_doorbell) 4605 /* XXX check if swapping is necessary on BE */ 4606 wptr = ring->adev->wb.wb[ring->wptr_offs]; 4607 else 4608 wptr = RREG32(mmCP_RB0_WPTR); 4609 4610 return wptr; 4611 } 4612 4613 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4614 { 4615 struct amdgpu_device *adev = ring->adev; 4616 4617 if (ring->use_doorbell) { 4618 /* XXX check if swapping is necessary on BE */ 4619 adev->wb.wb[ring->wptr_offs] = ring->wptr; 4620 WDOORBELL32(ring->doorbell_index, ring->wptr); 4621 } else { 4622 WREG32(mmCP_RB0_WPTR, ring->wptr); 4623 (void)RREG32(mmCP_RB0_WPTR); 4624 } 4625 } 4626 4627 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4628 { 4629 u32 ref_and_mask, reg_mem_engine; 4630 4631 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) { 4632 switch (ring->me) { 4633 case 1: 4634 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 4635 break; 4636 case 2: 4637 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 4638 break; 4639 default: 4640 return; 4641 } 4642 reg_mem_engine = 0; 4643 } else { 4644 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 4645 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 4646 } 4647 4648 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 4649 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 4650 WAIT_REG_MEM_FUNCTION(3) | /* == */ 4651 reg_mem_engine)); 4652 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 4653 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 4654 amdgpu_ring_write(ring, ref_and_mask); 4655 amdgpu_ring_write(ring, ref_and_mask); 4656 amdgpu_ring_write(ring, 0x20); /* poll interval */ 4657 } 4658 4659 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4660 struct amdgpu_ib *ib) 4661 { 4662 bool need_ctx_switch = ring->current_ctx != ib->ctx; 4663 u32 header, control = 0; 4664 u32 next_rptr = ring->wptr + 5; 4665 4666 /* drop the CE preamble IB for the same context */ 4667 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) 4668 return; 4669 4670 if (need_ctx_switch) 4671 next_rptr += 2; 4672 4673 next_rptr += 4; 4674 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4675 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 4676 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 4677 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 4678 amdgpu_ring_write(ring, next_rptr); 4679 4680 /* insert SWITCH_BUFFER packet before first IB in the ring frame */ 4681 if (need_ctx_switch) { 4682 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4683 amdgpu_ring_write(ring, 0); 4684 } 4685 4686 if (ib->flags & AMDGPU_IB_FLAG_CE) 4687 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4688 else 4689 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4690 4691 control |= ib->length_dw | 4692 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); 4693 4694 amdgpu_ring_write(ring, header); 4695 amdgpu_ring_write(ring, 4696 #ifdef __BIG_ENDIAN 4697 (2 << 0) | 4698 #endif 4699 (ib->gpu_addr & 0xFFFFFFFC)); 4700 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 4701 amdgpu_ring_write(ring, control); 4702 } 4703 4704 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4705 struct amdgpu_ib *ib) 4706 { 4707 u32 header, control = 0; 4708 u32 next_rptr = ring->wptr + 5; 4709 4710 control |= INDIRECT_BUFFER_VALID; 4711 4712 next_rptr += 4; 4713 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4714 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 4715 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 4716 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 4717 amdgpu_ring_write(ring, next_rptr); 4718 4719 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4720 4721 control |= ib->length_dw | 4722 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); 4723 4724 amdgpu_ring_write(ring, header); 4725 amdgpu_ring_write(ring, 4726 #ifdef __BIG_ENDIAN 4727 (2 << 0) | 4728 #endif 4729 (ib->gpu_addr & 0xFFFFFFFC)); 4730 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 4731 amdgpu_ring_write(ring, control); 4732 } 4733 4734 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 4735 u64 seq, unsigned flags) 4736 { 4737 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4738 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4739 4740 /* EVENT_WRITE_EOP - flush caches, send int */ 4741 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 4742 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 4743 EOP_TC_ACTION_EN | 4744 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4745 EVENT_INDEX(5))); 4746 amdgpu_ring_write(ring, addr & 0xfffffffc); 4747 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 4748 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4749 amdgpu_ring_write(ring, lower_32_bits(seq)); 4750 amdgpu_ring_write(ring, upper_32_bits(seq)); 4751 4752 } 4753 4754 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4755 unsigned vm_id, uint64_t pd_addr) 4756 { 4757 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 4758 uint32_t seq = ring->fence_drv.sync_seq; 4759 uint64_t addr = ring->fence_drv.gpu_addr; 4760 4761 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 4762 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 4763 WAIT_REG_MEM_FUNCTION(3))); /* equal */ 4764 amdgpu_ring_write(ring, addr & 0xfffffffc); 4765 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 4766 amdgpu_ring_write(ring, seq); 4767 amdgpu_ring_write(ring, 0xffffffff); 4768 amdgpu_ring_write(ring, 4); /* poll interval */ 4769 4770 if (usepfp) { 4771 /* synce CE with ME to prevent CE fetch CEIB before context switch done */ 4772 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4773 amdgpu_ring_write(ring, 0); 4774 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4775 amdgpu_ring_write(ring, 0); 4776 } 4777 4778 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4779 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 4780 WRITE_DATA_DST_SEL(0)) | 4781 WR_CONFIRM); 4782 if (vm_id < 8) { 4783 amdgpu_ring_write(ring, 4784 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 4785 } else { 4786 amdgpu_ring_write(ring, 4787 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 4788 } 4789 amdgpu_ring_write(ring, 0); 4790 amdgpu_ring_write(ring, pd_addr >> 12); 4791 4792 /* bits 0-15 are the VM contexts0-15 */ 4793 /* invalidate the cache */ 4794 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4795 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4796 WRITE_DATA_DST_SEL(0))); 4797 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 4798 amdgpu_ring_write(ring, 0); 4799 amdgpu_ring_write(ring, 1 << vm_id); 4800 4801 /* wait for the invalidate to complete */ 4802 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 4803 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 4804 WAIT_REG_MEM_FUNCTION(0) | /* always */ 4805 WAIT_REG_MEM_ENGINE(0))); /* me */ 4806 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 4807 amdgpu_ring_write(ring, 0); 4808 amdgpu_ring_write(ring, 0); /* ref */ 4809 amdgpu_ring_write(ring, 0); /* mask */ 4810 amdgpu_ring_write(ring, 0x20); /* poll interval */ 4811 4812 /* compute doesn't have PFP */ 4813 if (usepfp) { 4814 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4815 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4816 amdgpu_ring_write(ring, 0x0); 4817 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4818 amdgpu_ring_write(ring, 0); 4819 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4820 amdgpu_ring_write(ring, 0); 4821 } 4822 } 4823 4824 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4825 { 4826 return ring->adev->wb.wb[ring->rptr_offs]; 4827 } 4828 4829 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4830 { 4831 return ring->adev->wb.wb[ring->wptr_offs]; 4832 } 4833 4834 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4835 { 4836 struct amdgpu_device *adev = ring->adev; 4837 4838 /* XXX check if swapping is necessary on BE */ 4839 adev->wb.wb[ring->wptr_offs] = ring->wptr; 4840 WDOORBELL32(ring->doorbell_index, ring->wptr); 4841 } 4842 4843 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 4844 u64 addr, u64 seq, 4845 unsigned flags) 4846 { 4847 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4848 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4849 4850 /* RELEASE_MEM - flush caches, send int */ 4851 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 4852 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 4853 EOP_TC_ACTION_EN | 4854 EOP_TC_WB_ACTION_EN | 4855 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4856 EVENT_INDEX(5))); 4857 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4858 amdgpu_ring_write(ring, addr & 0xfffffffc); 4859 amdgpu_ring_write(ring, upper_32_bits(addr)); 4860 amdgpu_ring_write(ring, lower_32_bits(seq)); 4861 amdgpu_ring_write(ring, upper_32_bits(seq)); 4862 } 4863 4864 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4865 enum amdgpu_interrupt_state state) 4866 { 4867 u32 cp_int_cntl; 4868 4869 switch (state) { 4870 case AMDGPU_IRQ_STATE_DISABLE: 4871 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 4872 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4873 TIME_STAMP_INT_ENABLE, 0); 4874 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 4875 break; 4876 case AMDGPU_IRQ_STATE_ENABLE: 4877 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 4878 cp_int_cntl = 4879 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4880 TIME_STAMP_INT_ENABLE, 1); 4881 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 4882 break; 4883 default: 4884 break; 4885 } 4886 } 4887 4888 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4889 int me, int pipe, 4890 enum amdgpu_interrupt_state state) 4891 { 4892 u32 mec_int_cntl, mec_int_cntl_reg; 4893 4894 /* 4895 * amdgpu controls only pipe 0 of MEC1. That's why this function only 4896 * handles the setting of interrupts for this specific pipe. All other 4897 * pipes' interrupts are set by amdkfd. 4898 */ 4899 4900 if (me == 1) { 4901 switch (pipe) { 4902 case 0: 4903 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 4904 break; 4905 default: 4906 DRM_DEBUG("invalid pipe %d\n", pipe); 4907 return; 4908 } 4909 } else { 4910 DRM_DEBUG("invalid me %d\n", me); 4911 return; 4912 } 4913 4914 switch (state) { 4915 case AMDGPU_IRQ_STATE_DISABLE: 4916 mec_int_cntl = RREG32(mec_int_cntl_reg); 4917 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4918 TIME_STAMP_INT_ENABLE, 0); 4919 WREG32(mec_int_cntl_reg, mec_int_cntl); 4920 break; 4921 case AMDGPU_IRQ_STATE_ENABLE: 4922 mec_int_cntl = RREG32(mec_int_cntl_reg); 4923 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4924 TIME_STAMP_INT_ENABLE, 1); 4925 WREG32(mec_int_cntl_reg, mec_int_cntl); 4926 break; 4927 default: 4928 break; 4929 } 4930 } 4931 4932 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4933 struct amdgpu_irq_src *source, 4934 unsigned type, 4935 enum amdgpu_interrupt_state state) 4936 { 4937 u32 cp_int_cntl; 4938 4939 switch (state) { 4940 case AMDGPU_IRQ_STATE_DISABLE: 4941 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 4942 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4943 PRIV_REG_INT_ENABLE, 0); 4944 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 4945 break; 4946 case AMDGPU_IRQ_STATE_ENABLE: 4947 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 4948 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4949 PRIV_REG_INT_ENABLE, 0); 4950 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 4951 break; 4952 default: 4953 break; 4954 } 4955 4956 return 0; 4957 } 4958 4959 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4960 struct amdgpu_irq_src *source, 4961 unsigned type, 4962 enum amdgpu_interrupt_state state) 4963 { 4964 u32 cp_int_cntl; 4965 4966 switch (state) { 4967 case AMDGPU_IRQ_STATE_DISABLE: 4968 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 4969 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4970 PRIV_INSTR_INT_ENABLE, 0); 4971 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 4972 break; 4973 case AMDGPU_IRQ_STATE_ENABLE: 4974 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 4975 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4976 PRIV_INSTR_INT_ENABLE, 1); 4977 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 4978 break; 4979 default: 4980 break; 4981 } 4982 4983 return 0; 4984 } 4985 4986 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 4987 struct amdgpu_irq_src *src, 4988 unsigned type, 4989 enum amdgpu_interrupt_state state) 4990 { 4991 switch (type) { 4992 case AMDGPU_CP_IRQ_GFX_EOP: 4993 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 4994 break; 4995 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 4996 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 4997 break; 4998 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 4999 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5000 break; 5001 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5002 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5003 break; 5004 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5005 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5006 break; 5007 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5008 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5009 break; 5010 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5011 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5012 break; 5013 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5014 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5015 break; 5016 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5017 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5018 break; 5019 default: 5020 break; 5021 } 5022 return 0; 5023 } 5024 5025 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 5026 struct amdgpu_irq_src *source, 5027 struct amdgpu_iv_entry *entry) 5028 { 5029 int i; 5030 u8 me_id, pipe_id, queue_id; 5031 struct amdgpu_ring *ring; 5032 5033 DRM_DEBUG("IH: CP EOP\n"); 5034 me_id = (entry->ring_id & 0x0c) >> 2; 5035 pipe_id = (entry->ring_id & 0x03) >> 0; 5036 queue_id = (entry->ring_id & 0x70) >> 4; 5037 5038 switch (me_id) { 5039 case 0: 5040 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5041 break; 5042 case 1: 5043 case 2: 5044 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5045 ring = &adev->gfx.compute_ring[i]; 5046 /* Per-queue interrupt is supported for MEC starting from VI. 5047 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5048 */ 5049 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5050 amdgpu_fence_process(ring); 5051 } 5052 break; 5053 } 5054 return 0; 5055 } 5056 5057 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 5058 struct amdgpu_irq_src *source, 5059 struct amdgpu_iv_entry *entry) 5060 { 5061 DRM_ERROR("Illegal register access in command stream\n"); 5062 schedule_work(&adev->reset_work); 5063 return 0; 5064 } 5065 5066 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 5067 struct amdgpu_irq_src *source, 5068 struct amdgpu_iv_entry *entry) 5069 { 5070 DRM_ERROR("Illegal instruction in command stream\n"); 5071 schedule_work(&adev->reset_work); 5072 return 0; 5073 } 5074 5075 const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 5076 .early_init = gfx_v8_0_early_init, 5077 .late_init = gfx_v8_0_late_init, 5078 .sw_init = gfx_v8_0_sw_init, 5079 .sw_fini = gfx_v8_0_sw_fini, 5080 .hw_init = gfx_v8_0_hw_init, 5081 .hw_fini = gfx_v8_0_hw_fini, 5082 .suspend = gfx_v8_0_suspend, 5083 .resume = gfx_v8_0_resume, 5084 .is_idle = gfx_v8_0_is_idle, 5085 .wait_for_idle = gfx_v8_0_wait_for_idle, 5086 .soft_reset = gfx_v8_0_soft_reset, 5087 .print_status = gfx_v8_0_print_status, 5088 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 5089 .set_powergating_state = gfx_v8_0_set_powergating_state, 5090 }; 5091 5092 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 5093 .get_rptr = gfx_v8_0_ring_get_rptr_gfx, 5094 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 5095 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 5096 .parse_cs = NULL, 5097 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 5098 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 5099 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 5100 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 5101 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 5102 .test_ring = gfx_v8_0_ring_test_ring, 5103 .test_ib = gfx_v8_0_ring_test_ib, 5104 .insert_nop = amdgpu_ring_insert_nop, 5105 .pad_ib = amdgpu_ring_generic_pad_ib, 5106 }; 5107 5108 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 5109 .get_rptr = gfx_v8_0_ring_get_rptr_compute, 5110 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 5111 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 5112 .parse_cs = NULL, 5113 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 5114 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 5115 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 5116 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 5117 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 5118 .test_ring = gfx_v8_0_ring_test_ring, 5119 .test_ib = gfx_v8_0_ring_test_ib, 5120 .insert_nop = amdgpu_ring_insert_nop, 5121 .pad_ib = amdgpu_ring_generic_pad_ib, 5122 }; 5123 5124 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 5125 { 5126 int i; 5127 5128 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5129 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 5130 5131 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5132 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 5133 } 5134 5135 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 5136 .set = gfx_v8_0_set_eop_interrupt_state, 5137 .process = gfx_v8_0_eop_irq, 5138 }; 5139 5140 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 5141 .set = gfx_v8_0_set_priv_reg_fault_state, 5142 .process = gfx_v8_0_priv_reg_irq, 5143 }; 5144 5145 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 5146 .set = gfx_v8_0_set_priv_inst_fault_state, 5147 .process = gfx_v8_0_priv_inst_irq, 5148 }; 5149 5150 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 5151 { 5152 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5153 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 5154 5155 adev->gfx.priv_reg_irq.num_types = 1; 5156 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 5157 5158 adev->gfx.priv_inst_irq.num_types = 1; 5159 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 5160 } 5161 5162 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 5163 { 5164 /* init asci gds info */ 5165 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 5166 adev->gds.gws.total_size = 64; 5167 adev->gds.oa.total_size = 16; 5168 5169 if (adev->gds.mem.total_size == 64 * 1024) { 5170 adev->gds.mem.gfx_partition_size = 4096; 5171 adev->gds.mem.cs_partition_size = 4096; 5172 5173 adev->gds.gws.gfx_partition_size = 4; 5174 adev->gds.gws.cs_partition_size = 4; 5175 5176 adev->gds.oa.gfx_partition_size = 4; 5177 adev->gds.oa.cs_partition_size = 1; 5178 } else { 5179 adev->gds.mem.gfx_partition_size = 1024; 5180 adev->gds.mem.cs_partition_size = 1024; 5181 5182 adev->gds.gws.gfx_partition_size = 16; 5183 adev->gds.gws.cs_partition_size = 16; 5184 5185 adev->gds.oa.gfx_partition_size = 4; 5186 adev->gds.oa.cs_partition_size = 4; 5187 } 5188 } 5189 5190 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev, 5191 u32 se, u32 sh) 5192 { 5193 u32 mask = 0, tmp, tmp1; 5194 int i; 5195 5196 gfx_v8_0_select_se_sh(adev, se, sh); 5197 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); 5198 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 5199 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 5200 5201 tmp &= 0xffff0000; 5202 5203 tmp |= tmp1; 5204 tmp >>= 16; 5205 5206 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { 5207 mask <<= 1; 5208 mask |= 1; 5209 } 5210 5211 return (~tmp) & mask; 5212 } 5213 5214 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, 5215 struct amdgpu_cu_info *cu_info) 5216 { 5217 int i, j, k, counter, active_cu_number = 0; 5218 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5219 5220 if (!adev || !cu_info) 5221 return -EINVAL; 5222 5223 mutex_lock(&adev->grbm_idx_mutex); 5224 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5225 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5226 mask = 1; 5227 ao_bitmap = 0; 5228 counter = 0; 5229 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j); 5230 cu_info->bitmap[i][j] = bitmap; 5231 5232 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5233 if (bitmap & mask) { 5234 if (counter < 2) 5235 ao_bitmap |= mask; 5236 counter ++; 5237 } 5238 mask <<= 1; 5239 } 5240 active_cu_number += counter; 5241 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5242 } 5243 } 5244 5245 cu_info->number = active_cu_number; 5246 cu_info->ao_cu_mask = ao_cu_mask; 5247 mutex_unlock(&adev->grbm_idx_mutex); 5248 return 0; 5249 } 5250