1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 29 #include "amdgpu.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_trace.h" 32 33 #include "gc/gc_10_1_0_offset.h" 34 #include "gc/gc_10_1_0_sh_mask.h" 35 #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h" 36 #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h" 37 38 #include "soc15_common.h" 39 #include "soc15.h" 40 #include "navi10_sdma_pkt_open.h" 41 #include "nbio_v2_3.h" 42 #include "sdma_common.h" 43 #include "sdma_v5_0.h" 44 45 MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); 46 MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); 47 48 MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); 49 MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); 50 51 MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); 52 MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); 53 54 MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin"); 55 MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); 56 57 #define SDMA1_REG_OFFSET 0x600 58 #define SDMA0_HYP_DEC_REG_START 0x5880 59 #define SDMA0_HYP_DEC_REG_END 0x5893 60 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 61 62 static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); 63 static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); 64 static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); 65 static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); 66 67 static const struct soc15_reg_golden golden_settings_sdma_5[] = { 68 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), 69 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 70 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 71 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 72 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 73 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 74 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 75 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 76 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 77 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 78 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 79 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x00ffffff, 0x000c5c00), 80 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), 81 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 82 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 83 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 84 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 85 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 86 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 87 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 88 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 89 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 90 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 91 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00) 92 }; 93 94 static const struct soc15_reg_golden golden_settings_sdma_5_sriov[] = { 95 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 96 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 97 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 98 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 99 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 100 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 101 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 102 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 103 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 104 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 105 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 106 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 107 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 108 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 115 }; 116 117 static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), 120 }; 121 122 static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 125 }; 126 127 static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 129 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), 130 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), 131 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), 132 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 134 }; 135 136 static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = { 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00), 151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), 152 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), 153 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), 154 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00) 165 }; 166 167 static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) 168 { 169 u32 base; 170 171 if (internal_offset >= SDMA0_HYP_DEC_REG_START && 172 internal_offset <= SDMA0_HYP_DEC_REG_END) { 173 base = adev->reg_offset[GC_HWIP][0][1]; 174 if (instance == 1) 175 internal_offset += SDMA1_HYP_DEC_REG_OFFSET; 176 } else { 177 base = adev->reg_offset[GC_HWIP][0][0]; 178 if (instance == 1) 179 internal_offset += SDMA1_REG_OFFSET; 180 } 181 182 return base + internal_offset; 183 } 184 185 static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) 186 { 187 switch (adev->ip_versions[SDMA0_HWIP][0]) { 188 case IP_VERSION(5, 0, 0): 189 soc15_program_register_sequence(adev, 190 golden_settings_sdma_5, 191 (const u32)ARRAY_SIZE(golden_settings_sdma_5)); 192 soc15_program_register_sequence(adev, 193 golden_settings_sdma_nv10, 194 (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); 195 break; 196 case IP_VERSION(5, 0, 2): 197 soc15_program_register_sequence(adev, 198 golden_settings_sdma_5, 199 (const u32)ARRAY_SIZE(golden_settings_sdma_5)); 200 soc15_program_register_sequence(adev, 201 golden_settings_sdma_nv14, 202 (const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); 203 break; 204 case IP_VERSION(5, 0, 5): 205 if (amdgpu_sriov_vf(adev)) 206 soc15_program_register_sequence(adev, 207 golden_settings_sdma_5_sriov, 208 (const u32)ARRAY_SIZE(golden_settings_sdma_5_sriov)); 209 else 210 soc15_program_register_sequence(adev, 211 golden_settings_sdma_5, 212 (const u32)ARRAY_SIZE(golden_settings_sdma_5)); 213 soc15_program_register_sequence(adev, 214 golden_settings_sdma_nv12, 215 (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); 216 break; 217 case IP_VERSION(5, 0, 1): 218 soc15_program_register_sequence(adev, 219 golden_settings_sdma_cyan_skillfish, 220 (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish)); 221 break; 222 default: 223 break; 224 } 225 } 226 227 /** 228 * sdma_v5_0_init_microcode - load ucode images from disk 229 * 230 * @adev: amdgpu_device pointer 231 * 232 * Use the firmware interface to load the ucode images into 233 * the driver (not loaded into hw). 234 * Returns 0 on success, error on failure. 235 */ 236 237 // emulation only, won't work on real chip 238 // navi10 real chip need to use PSP to load firmware 239 static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) 240 { 241 const char *chip_name; 242 char fw_name[40]; 243 int err = 0, i; 244 struct amdgpu_firmware_info *info = NULL; 245 const struct common_firmware_header *header = NULL; 246 const struct sdma_firmware_header_v1_0 *hdr; 247 248 if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5))) 249 return 0; 250 251 DRM_DEBUG("\n"); 252 253 switch (adev->ip_versions[SDMA0_HWIP][0]) { 254 case IP_VERSION(5, 0, 0): 255 chip_name = "navi10"; 256 break; 257 case IP_VERSION(5, 0, 2): 258 chip_name = "navi14"; 259 break; 260 case IP_VERSION(5, 0, 5): 261 chip_name = "navi12"; 262 break; 263 case IP_VERSION(5, 0, 1): 264 chip_name = "cyan_skillfish2"; 265 break; 266 default: 267 BUG(); 268 } 269 270 for (i = 0; i < adev->sdma.num_instances; i++) { 271 if (i == 0) 272 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 273 else 274 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 275 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); 276 if (err) 277 goto out; 278 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); 279 if (err) 280 goto out; 281 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 282 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 283 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 284 if (adev->sdma.instance[i].feature_version >= 20) 285 adev->sdma.instance[i].burst_nop = true; 286 DRM_DEBUG("psp_load == '%s'\n", 287 adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); 288 289 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 290 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 291 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 292 info->fw = adev->sdma.instance[i].fw; 293 header = (const struct common_firmware_header *)info->fw->data; 294 adev->firmware.fw_size += 295 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 296 } 297 } 298 out: 299 if (err) { 300 DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name); 301 for (i = 0; i < adev->sdma.num_instances; i++) { 302 release_firmware(adev->sdma.instance[i].fw); 303 adev->sdma.instance[i].fw = NULL; 304 } 305 } 306 return err; 307 } 308 309 static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) 310 { 311 unsigned ret; 312 313 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); 314 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 315 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 316 amdgpu_ring_write(ring, 1); 317 ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ 318 amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ 319 320 return ret; 321 } 322 323 static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, 324 unsigned offset) 325 { 326 unsigned cur; 327 328 BUG_ON(offset > ring->buf_mask); 329 BUG_ON(ring->ring[offset] != 0x55aa55aa); 330 331 cur = (ring->wptr - 1) & ring->buf_mask; 332 if (cur > offset) 333 ring->ring[offset] = cur - offset; 334 else 335 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 336 } 337 338 /** 339 * sdma_v5_0_ring_get_rptr - get the current read pointer 340 * 341 * @ring: amdgpu ring pointer 342 * 343 * Get the current rptr from the hardware (NAVI10+). 344 */ 345 static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) 346 { 347 u64 *rptr; 348 349 /* XXX check if swapping is necessary on BE */ 350 rptr = (u64 *)ring->rptr_cpu_addr; 351 352 DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); 353 return ((*rptr) >> 2); 354 } 355 356 /** 357 * sdma_v5_0_ring_get_wptr - get the current write pointer 358 * 359 * @ring: amdgpu ring pointer 360 * 361 * Get the current wptr from the hardware (NAVI10+). 362 */ 363 static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) 364 { 365 struct amdgpu_device *adev = ring->adev; 366 u64 wptr; 367 368 if (ring->use_doorbell) { 369 /* XXX check if swapping is necessary on BE */ 370 wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); 371 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); 372 } else { 373 wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); 374 wptr = wptr << 32; 375 wptr |= RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); 376 DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); 377 } 378 379 return wptr >> 2; 380 } 381 382 /** 383 * sdma_v5_0_ring_set_wptr - commit the write pointer 384 * 385 * @ring: amdgpu ring pointer 386 * 387 * Write the wptr back to the hardware (NAVI10+). 388 */ 389 static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) 390 { 391 struct amdgpu_device *adev = ring->adev; 392 uint32_t *wptr_saved; 393 uint32_t *is_queue_unmap; 394 uint64_t aggregated_db_index; 395 uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; 396 397 DRM_DEBUG("Setting write pointer\n"); 398 if (ring->is_mes_queue) { 399 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); 400 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + 401 sizeof(uint32_t)); 402 aggregated_db_index = 403 amdgpu_mes_get_aggregated_doorbell_index(adev, 404 AMDGPU_MES_PRIORITY_LEVEL_NORMAL); 405 406 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 407 ring->wptr << 2); 408 *wptr_saved = ring->wptr << 2; 409 if (*is_queue_unmap) { 410 WDOORBELL64(aggregated_db_index, ring->wptr << 2); 411 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 412 ring->doorbell_index, ring->wptr << 2); 413 WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 414 } else { 415 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 416 ring->doorbell_index, ring->wptr << 2); 417 WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 418 419 if (*is_queue_unmap) 420 WDOORBELL64(aggregated_db_index, 421 ring->wptr << 2); 422 } 423 } else { 424 if (ring->use_doorbell) { 425 DRM_DEBUG("Using doorbell -- " 426 "wptr_offs == 0x%08x " 427 "lower_32_bits(ring->wptr) << 2 == 0x%08x " 428 "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", 429 ring->wptr_offs, 430 lower_32_bits(ring->wptr << 2), 431 upper_32_bits(ring->wptr << 2)); 432 /* XXX check if swapping is necessary on BE */ 433 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 434 ring->wptr << 2); 435 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 436 ring->doorbell_index, ring->wptr << 2); 437 WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 438 } else { 439 DRM_DEBUG("Not using doorbell -- " 440 "mmSDMA%i_GFX_RB_WPTR == 0x%08x " 441 "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", 442 ring->me, 443 lower_32_bits(ring->wptr << 2), 444 ring->me, 445 upper_32_bits(ring->wptr << 2)); 446 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, 447 ring->me, mmSDMA0_GFX_RB_WPTR), 448 lower_32_bits(ring->wptr << 2)); 449 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, 450 ring->me, mmSDMA0_GFX_RB_WPTR_HI), 451 upper_32_bits(ring->wptr << 2)); 452 } 453 } 454 } 455 456 static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 457 { 458 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 459 int i; 460 461 for (i = 0; i < count; i++) 462 if (sdma && sdma->burst_nop && (i == 0)) 463 amdgpu_ring_write(ring, ring->funcs->nop | 464 SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 465 else 466 amdgpu_ring_write(ring, ring->funcs->nop); 467 } 468 469 /** 470 * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine 471 * 472 * @ring: amdgpu ring pointer 473 * @job: job to retrieve vmid from 474 * @ib: IB object to schedule 475 * @flags: unused 476 * 477 * Schedule an IB in the DMA ring (NAVI10). 478 */ 479 static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, 480 struct amdgpu_job *job, 481 struct amdgpu_ib *ib, 482 uint32_t flags) 483 { 484 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 485 uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); 486 487 /* An IB packet must end on a 8 DW boundary--the next dword 488 * must be on a 8-dword boundary. Our IB packet below is 6 489 * dwords long, thus add x number of NOPs, such that, in 490 * modular arithmetic, 491 * wptr + 6 + x = 8k, k >= 0, which in C is, 492 * (wptr + 6 + x) % 8 = 0. 493 * The expression below, is a solution of x. 494 */ 495 sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 496 497 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 498 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); 499 /* base must be 32 byte aligned */ 500 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 501 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 502 amdgpu_ring_write(ring, ib->length_dw); 503 amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); 504 amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); 505 } 506 507 /** 508 * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse 509 * 510 * @ring: amdgpu ring pointer 511 * 512 * flush the IB by graphics cache rinse. 513 */ 514 static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring) 515 { 516 uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | 517 SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | 518 SDMA_GCR_GLI_INV(1); 519 520 /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ 521 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); 522 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); 523 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) | 524 SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0)); 525 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) | 526 SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16)); 527 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) | 528 SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0)); 529 } 530 531 /** 532 * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 533 * 534 * @ring: amdgpu ring pointer 535 * 536 * Emit an hdp flush packet on the requested DMA ring. 537 */ 538 static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 539 { 540 struct amdgpu_device *adev = ring->adev; 541 u32 ref_and_mask = 0; 542 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 543 544 if (ring->me == 0) 545 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; 546 else 547 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; 548 549 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 550 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 551 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 552 amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); 553 amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); 554 amdgpu_ring_write(ring, ref_and_mask); /* reference */ 555 amdgpu_ring_write(ring, ref_and_mask); /* mask */ 556 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 557 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 558 } 559 560 /** 561 * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring 562 * 563 * @ring: amdgpu ring pointer 564 * @addr: address 565 * @seq: sequence number 566 * @flags: fence related flags 567 * 568 * Add a DMA fence packet to the ring to write 569 * the fence seq number and DMA trap packet to generate 570 * an interrupt if needed (NAVI10). 571 */ 572 static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 573 unsigned flags) 574 { 575 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 576 /* write the fence */ 577 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | 578 SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ 579 /* zero in first two bits */ 580 BUG_ON(addr & 0x3); 581 amdgpu_ring_write(ring, lower_32_bits(addr)); 582 amdgpu_ring_write(ring, upper_32_bits(addr)); 583 amdgpu_ring_write(ring, lower_32_bits(seq)); 584 585 /* optionally write high bits as well */ 586 if (write64bit) { 587 addr += 4; 588 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | 589 SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); 590 /* zero in first two bits */ 591 BUG_ON(addr & 0x3); 592 amdgpu_ring_write(ring, lower_32_bits(addr)); 593 amdgpu_ring_write(ring, upper_32_bits(addr)); 594 amdgpu_ring_write(ring, upper_32_bits(seq)); 595 } 596 597 if (flags & AMDGPU_FENCE_FLAG_INT) { 598 uint32_t ctx = ring->is_mes_queue ? 599 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; 600 /* generate an interrupt */ 601 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 602 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); 603 } 604 } 605 606 607 /** 608 * sdma_v5_0_gfx_stop - stop the gfx async dma engines 609 * 610 * @adev: amdgpu_device pointer 611 * 612 * Stop the gfx async dma ring buffers (NAVI10). 613 */ 614 static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) 615 { 616 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; 617 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; 618 u32 rb_cntl, ib_cntl; 619 int i; 620 621 if ((adev->mman.buffer_funcs_ring == sdma0) || 622 (adev->mman.buffer_funcs_ring == sdma1)) 623 amdgpu_ttm_set_buffer_funcs_status(adev, false); 624 625 for (i = 0; i < adev->sdma.num_instances; i++) { 626 rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 627 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 628 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 629 ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); 630 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 631 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 632 } 633 } 634 635 /** 636 * sdma_v5_0_rlc_stop - stop the compute async dma engines 637 * 638 * @adev: amdgpu_device pointer 639 * 640 * Stop the compute async dma queues (NAVI10). 641 */ 642 static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev) 643 { 644 /* XXX todo */ 645 } 646 647 /** 648 * sdma_v5_0_ctx_switch_enable - stop the async dma engines context switch 649 * 650 * @adev: amdgpu_device pointer 651 * @enable: enable/disable the DMA MEs context switch. 652 * 653 * Halt or unhalt the async dma engines context switch (NAVI10). 654 */ 655 static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) 656 { 657 u32 f32_cntl = 0, phase_quantum = 0; 658 int i; 659 660 if (amdgpu_sdma_phase_quantum) { 661 unsigned value = amdgpu_sdma_phase_quantum; 662 unsigned unit = 0; 663 664 while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> 665 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { 666 value = (value + 1) >> 1; 667 unit++; 668 } 669 if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> 670 SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { 671 value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> 672 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); 673 unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> 674 SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); 675 WARN_ONCE(1, 676 "clamping sdma_phase_quantum to %uK clock cycles\n", 677 value << unit); 678 } 679 phase_quantum = 680 value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | 681 unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; 682 } 683 684 for (i = 0; i < adev->sdma.num_instances; i++) { 685 if (!amdgpu_sriov_vf(adev)) { 686 f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); 687 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 688 AUTO_CTXSW_ENABLE, enable ? 1 : 0); 689 } 690 691 if (enable && amdgpu_sdma_phase_quantum) { 692 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), 693 phase_quantum); 694 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), 695 phase_quantum); 696 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), 697 phase_quantum); 698 } 699 if (!amdgpu_sriov_vf(adev)) 700 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); 701 } 702 703 } 704 705 /** 706 * sdma_v5_0_enable - stop the async dma engines 707 * 708 * @adev: amdgpu_device pointer 709 * @enable: enable/disable the DMA MEs. 710 * 711 * Halt or unhalt the async dma engines (NAVI10). 712 */ 713 static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) 714 { 715 u32 f32_cntl; 716 int i; 717 718 if (!enable) { 719 sdma_v5_0_gfx_stop(adev); 720 sdma_v5_0_rlc_stop(adev); 721 } 722 723 if (amdgpu_sriov_vf(adev)) 724 return; 725 726 for (i = 0; i < adev->sdma.num_instances; i++) { 727 f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); 728 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); 729 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); 730 } 731 } 732 733 /** 734 * sdma_v5_0_gfx_resume - setup and start the async dma engines 735 * 736 * @adev: amdgpu_device pointer 737 * 738 * Set up the gfx DMA ring buffers and enable them (NAVI10). 739 * Returns 0 for success, error for failure. 740 */ 741 static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) 742 { 743 struct amdgpu_ring *ring; 744 u32 rb_cntl, ib_cntl; 745 u32 rb_bufsz; 746 u32 doorbell; 747 u32 doorbell_offset; 748 u32 temp; 749 u32 wptr_poll_cntl; 750 u64 wptr_gpu_addr; 751 int i, r; 752 753 for (i = 0; i < adev->sdma.num_instances; i++) { 754 ring = &adev->sdma.instance[i].ring; 755 756 if (!amdgpu_sriov_vf(adev)) 757 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); 758 759 /* Set ring buffer size in dwords */ 760 rb_bufsz = order_base_2(ring->ring_size / 4); 761 rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 762 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 763 #ifdef __BIG_ENDIAN 764 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 765 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 766 RPTR_WRITEBACK_SWAP_ENABLE, 1); 767 #endif 768 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 769 770 /* Initialize the ring buffer's read and write pointers */ 771 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); 772 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); 773 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); 774 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); 775 776 /* setup the wptr shadow polling */ 777 wptr_gpu_addr = ring->wptr_gpu_addr; 778 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), 779 lower_32_bits(wptr_gpu_addr)); 780 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), 781 upper_32_bits(wptr_gpu_addr)); 782 wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, 783 mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); 784 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 785 SDMA0_GFX_RB_WPTR_POLL_CNTL, 786 F32_POLL_ENABLE, 1); 787 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 788 wptr_poll_cntl); 789 790 /* set the wb address whether it's enabled or not */ 791 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), 792 upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); 793 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), 794 lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); 795 796 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 797 798 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), 799 ring->gpu_addr >> 8); 800 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), 801 ring->gpu_addr >> 40); 802 803 ring->wptr = 0; 804 805 /* before programing wptr to a less value, need set minor_ptr_update first */ 806 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); 807 808 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ 809 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 810 lower_32_bits(ring->wptr << 2)); 811 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 812 upper_32_bits(ring->wptr << 2)); 813 } 814 815 doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); 816 doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, 817 mmSDMA0_GFX_DOORBELL_OFFSET)); 818 819 if (ring->use_doorbell) { 820 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); 821 doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, 822 OFFSET, ring->doorbell_index); 823 } else { 824 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 825 } 826 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); 827 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), 828 doorbell_offset); 829 830 adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, 831 ring->doorbell_index, 20); 832 833 if (amdgpu_sriov_vf(adev)) 834 sdma_v5_0_ring_set_wptr(ring); 835 836 /* set minor_ptr_update to 0 after wptr programed */ 837 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); 838 839 if (!amdgpu_sriov_vf(adev)) { 840 /* set utc l1 enable flag always to 1 */ 841 temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); 842 temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); 843 844 /* enable MCBP */ 845 temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); 846 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); 847 848 /* Set up RESP_MODE to non-copy addresses */ 849 temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); 850 temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); 851 temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); 852 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); 853 854 /* program default cache read and write policy */ 855 temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); 856 /* clean read policy and write policy bits */ 857 temp &= 0xFF0FFF; 858 temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); 859 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); 860 } 861 862 if (!amdgpu_sriov_vf(adev)) { 863 /* unhalt engine */ 864 temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); 865 temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); 866 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); 867 } 868 869 /* enable DMA RB */ 870 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 871 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 872 873 ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); 874 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 875 #ifdef __BIG_ENDIAN 876 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 877 #endif 878 /* enable DMA IBs */ 879 WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 880 881 ring->sched.ready = true; 882 883 if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ 884 sdma_v5_0_ctx_switch_enable(adev, true); 885 sdma_v5_0_enable(adev, true); 886 } 887 888 r = amdgpu_ring_test_helper(ring); 889 if (r) 890 return r; 891 892 if (adev->mman.buffer_funcs_ring == ring) 893 amdgpu_ttm_set_buffer_funcs_status(adev, true); 894 } 895 896 return 0; 897 } 898 899 /** 900 * sdma_v5_0_rlc_resume - setup and start the async dma engines 901 * 902 * @adev: amdgpu_device pointer 903 * 904 * Set up the compute DMA queues and enable them (NAVI10). 905 * Returns 0 for success, error for failure. 906 */ 907 static int sdma_v5_0_rlc_resume(struct amdgpu_device *adev) 908 { 909 return 0; 910 } 911 912 /** 913 * sdma_v5_0_load_microcode - load the sDMA ME ucode 914 * 915 * @adev: amdgpu_device pointer 916 * 917 * Loads the sDMA0/1 ucode. 918 * Returns 0 for success, -EINVAL if the ucode is not available. 919 */ 920 static int sdma_v5_0_load_microcode(struct amdgpu_device *adev) 921 { 922 const struct sdma_firmware_header_v1_0 *hdr; 923 const __le32 *fw_data; 924 u32 fw_size; 925 int i, j; 926 927 /* halt the MEs */ 928 sdma_v5_0_enable(adev, false); 929 930 for (i = 0; i < adev->sdma.num_instances; i++) { 931 if (!adev->sdma.instance[i].fw) 932 return -EINVAL; 933 934 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 935 amdgpu_ucode_print_sdma_hdr(&hdr->header); 936 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 937 938 fw_data = (const __le32 *) 939 (adev->sdma.instance[i].fw->data + 940 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 941 942 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); 943 944 for (j = 0; j < fw_size; j++) { 945 if (amdgpu_emu_mode == 1 && j % 500 == 0) 946 msleep(1); 947 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); 948 } 949 950 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); 951 } 952 953 return 0; 954 } 955 956 /** 957 * sdma_v5_0_start - setup and start the async dma engines 958 * 959 * @adev: amdgpu_device pointer 960 * 961 * Set up the DMA engines and enable them (NAVI10). 962 * Returns 0 for success, error for failure. 963 */ 964 static int sdma_v5_0_start(struct amdgpu_device *adev) 965 { 966 int r = 0; 967 968 if (amdgpu_sriov_vf(adev)) { 969 sdma_v5_0_ctx_switch_enable(adev, false); 970 sdma_v5_0_enable(adev, false); 971 972 /* set RB registers */ 973 r = sdma_v5_0_gfx_resume(adev); 974 return r; 975 } 976 977 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 978 r = sdma_v5_0_load_microcode(adev); 979 if (r) 980 return r; 981 } 982 983 /* unhalt the MEs */ 984 sdma_v5_0_enable(adev, true); 985 /* enable sdma ring preemption */ 986 sdma_v5_0_ctx_switch_enable(adev, true); 987 988 /* start the gfx rings and rlc compute queues */ 989 r = sdma_v5_0_gfx_resume(adev); 990 if (r) 991 return r; 992 r = sdma_v5_0_rlc_resume(adev); 993 994 return r; 995 } 996 997 static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd, 998 struct amdgpu_mqd_prop *prop) 999 { 1000 struct v10_sdma_mqd *m = mqd; 1001 uint64_t wb_gpu_addr; 1002 1003 m->sdmax_rlcx_rb_cntl = 1004 order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | 1005 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 1006 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | 1007 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT; 1008 1009 m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); 1010 m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); 1011 1012 m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0, 1013 mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); 1014 1015 wb_gpu_addr = prop->wptr_gpu_addr; 1016 m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); 1017 m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); 1018 1019 wb_gpu_addr = prop->rptr_gpu_addr; 1020 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); 1021 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); 1022 1023 m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0, 1024 mmSDMA0_GFX_IB_CNTL)); 1025 1026 m->sdmax_rlcx_doorbell_offset = 1027 prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; 1028 1029 m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1); 1030 1031 return 0; 1032 } 1033 1034 static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev) 1035 { 1036 adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd); 1037 adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init; 1038 } 1039 1040 /** 1041 * sdma_v5_0_ring_test_ring - simple async dma engine test 1042 * 1043 * @ring: amdgpu_ring structure holding ring information 1044 * 1045 * Test the DMA engine by writing using it to write an 1046 * value to memory. (NAVI10). 1047 * Returns 0 for success, error for failure. 1048 */ 1049 static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) 1050 { 1051 struct amdgpu_device *adev = ring->adev; 1052 unsigned i; 1053 unsigned index; 1054 int r; 1055 u32 tmp; 1056 u64 gpu_addr; 1057 volatile uint32_t *cpu_ptr = NULL; 1058 1059 tmp = 0xCAFEDEAD; 1060 1061 if (ring->is_mes_queue) { 1062 uint32_t offset = 0; 1063 offset = amdgpu_mes_ctx_get_offs(ring, 1064 AMDGPU_MES_CTX_PADDING_OFFS); 1065 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1066 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 1067 *cpu_ptr = tmp; 1068 } else { 1069 r = amdgpu_device_wb_get(adev, &index); 1070 if (r) { 1071 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 1072 return r; 1073 } 1074 1075 gpu_addr = adev->wb.gpu_addr + (index * 4); 1076 adev->wb.wb[index] = cpu_to_le32(tmp); 1077 } 1078 1079 r = amdgpu_ring_alloc(ring, 20); 1080 if (r) { 1081 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 1082 amdgpu_device_wb_free(adev, index); 1083 return r; 1084 } 1085 1086 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 1087 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 1088 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 1089 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 1090 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); 1091 amdgpu_ring_write(ring, 0xDEADBEEF); 1092 amdgpu_ring_commit(ring); 1093 1094 for (i = 0; i < adev->usec_timeout; i++) { 1095 if (ring->is_mes_queue) 1096 tmp = le32_to_cpu(*cpu_ptr); 1097 else 1098 tmp = le32_to_cpu(adev->wb.wb[index]); 1099 if (tmp == 0xDEADBEEF) 1100 break; 1101 if (amdgpu_emu_mode == 1) 1102 msleep(1); 1103 else 1104 udelay(1); 1105 } 1106 1107 if (i >= adev->usec_timeout) 1108 r = -ETIMEDOUT; 1109 1110 if (!ring->is_mes_queue) 1111 amdgpu_device_wb_free(adev, index); 1112 1113 return r; 1114 } 1115 1116 /** 1117 * sdma_v5_0_ring_test_ib - test an IB on the DMA engine 1118 * 1119 * @ring: amdgpu_ring structure holding ring information 1120 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 1121 * 1122 * Test a simple IB in the DMA ring (NAVI10). 1123 * Returns 0 on success, error on failure. 1124 */ 1125 static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1126 { 1127 struct amdgpu_device *adev = ring->adev; 1128 struct amdgpu_ib ib; 1129 struct dma_fence *f = NULL; 1130 unsigned index; 1131 long r; 1132 u32 tmp = 0; 1133 u64 gpu_addr; 1134 volatile uint32_t *cpu_ptr = NULL; 1135 1136 tmp = 0xCAFEDEAD; 1137 memset(&ib, 0, sizeof(ib)); 1138 1139 if (ring->is_mes_queue) { 1140 uint32_t offset = 0; 1141 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 1142 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1143 ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 1144 1145 offset = amdgpu_mes_ctx_get_offs(ring, 1146 AMDGPU_MES_CTX_PADDING_OFFS); 1147 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1148 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 1149 *cpu_ptr = tmp; 1150 } else { 1151 r = amdgpu_device_wb_get(adev, &index); 1152 if (r) { 1153 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 1154 return r; 1155 } 1156 1157 gpu_addr = adev->wb.gpu_addr + (index * 4); 1158 adev->wb.wb[index] = cpu_to_le32(tmp); 1159 1160 r = amdgpu_ib_get(adev, NULL, 256, 1161 AMDGPU_IB_POOL_DIRECT, &ib); 1162 if (r) { 1163 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 1164 goto err0; 1165 } 1166 } 1167 1168 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 1169 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 1170 ib.ptr[1] = lower_32_bits(gpu_addr); 1171 ib.ptr[2] = upper_32_bits(gpu_addr); 1172 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 1173 ib.ptr[4] = 0xDEADBEEF; 1174 ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 1175 ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 1176 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 1177 ib.length_dw = 8; 1178 1179 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1180 if (r) 1181 goto err1; 1182 1183 r = dma_fence_wait_timeout(f, false, timeout); 1184 if (r == 0) { 1185 DRM_ERROR("amdgpu: IB test timed out\n"); 1186 r = -ETIMEDOUT; 1187 goto err1; 1188 } else if (r < 0) { 1189 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 1190 goto err1; 1191 } 1192 1193 if (ring->is_mes_queue) 1194 tmp = le32_to_cpu(*cpu_ptr); 1195 else 1196 tmp = le32_to_cpu(adev->wb.wb[index]); 1197 1198 if (tmp == 0xDEADBEEF) 1199 r = 0; 1200 else 1201 r = -EINVAL; 1202 1203 err1: 1204 amdgpu_ib_free(adev, &ib, NULL); 1205 dma_fence_put(f); 1206 err0: 1207 if (!ring->is_mes_queue) 1208 amdgpu_device_wb_free(adev, index); 1209 return r; 1210 } 1211 1212 1213 /** 1214 * sdma_v5_0_vm_copy_pte - update PTEs by copying them from the GART 1215 * 1216 * @ib: indirect buffer to fill with commands 1217 * @pe: addr of the page entry 1218 * @src: src addr to copy from 1219 * @count: number of page entries to update 1220 * 1221 * Update PTEs by copying them from the GART using sDMA (NAVI10). 1222 */ 1223 static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib, 1224 uint64_t pe, uint64_t src, 1225 unsigned count) 1226 { 1227 unsigned bytes = count * 8; 1228 1229 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1230 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1231 ib->ptr[ib->length_dw++] = bytes - 1; 1232 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1233 ib->ptr[ib->length_dw++] = lower_32_bits(src); 1234 ib->ptr[ib->length_dw++] = upper_32_bits(src); 1235 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 1236 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1237 1238 } 1239 1240 /** 1241 * sdma_v5_0_vm_write_pte - update PTEs by writing them manually 1242 * 1243 * @ib: indirect buffer to fill with commands 1244 * @pe: addr of the page entry 1245 * @value: dst addr to write into pe 1246 * @count: number of page entries to update 1247 * @incr: increase next addr by incr bytes 1248 * 1249 * Update PTEs by writing them manually using sDMA (NAVI10). 1250 */ 1251 static void sdma_v5_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 1252 uint64_t value, unsigned count, 1253 uint32_t incr) 1254 { 1255 unsigned ndw = count * 2; 1256 1257 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 1258 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 1259 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 1260 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1261 ib->ptr[ib->length_dw++] = ndw - 1; 1262 for (; ndw > 0; ndw -= 2) { 1263 ib->ptr[ib->length_dw++] = lower_32_bits(value); 1264 ib->ptr[ib->length_dw++] = upper_32_bits(value); 1265 value += incr; 1266 } 1267 } 1268 1269 /** 1270 * sdma_v5_0_vm_set_pte_pde - update the page tables using sDMA 1271 * 1272 * @ib: indirect buffer to fill with commands 1273 * @pe: addr of the page entry 1274 * @addr: dst addr to write into pe 1275 * @count: number of page entries to update 1276 * @incr: increase next addr by incr bytes 1277 * @flags: access flags 1278 * 1279 * Update the page tables using sDMA (NAVI10). 1280 */ 1281 static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, 1282 uint64_t pe, 1283 uint64_t addr, unsigned count, 1284 uint32_t incr, uint64_t flags) 1285 { 1286 /* for physically contiguous pages (vram) */ 1287 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); 1288 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 1289 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 1290 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 1291 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 1292 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 1293 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 1294 ib->ptr[ib->length_dw++] = incr; /* increment size */ 1295 ib->ptr[ib->length_dw++] = 0; 1296 ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ 1297 } 1298 1299 /** 1300 * sdma_v5_0_ring_pad_ib - pad the IB 1301 * @ring: amdgpu_ring structure holding ring information 1302 * @ib: indirect buffer to fill with padding 1303 * 1304 * Pad the IB with NOPs to a boundary multiple of 8. 1305 */ 1306 static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1307 { 1308 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 1309 u32 pad_count; 1310 int i; 1311 1312 pad_count = (-ib->length_dw) & 0x7; 1313 for (i = 0; i < pad_count; i++) 1314 if (sdma && sdma->burst_nop && (i == 0)) 1315 ib->ptr[ib->length_dw++] = 1316 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 1317 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 1318 else 1319 ib->ptr[ib->length_dw++] = 1320 SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 1321 } 1322 1323 1324 /** 1325 * sdma_v5_0_ring_emit_pipeline_sync - sync the pipeline 1326 * 1327 * @ring: amdgpu_ring pointer 1328 * 1329 * Make sure all previous operations are completed (CIK). 1330 */ 1331 static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 1332 { 1333 uint32_t seq = ring->fence_drv.sync_seq; 1334 uint64_t addr = ring->fence_drv.gpu_addr; 1335 1336 /* wait for idle */ 1337 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1338 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1339 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 1340 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 1341 amdgpu_ring_write(ring, addr & 0xfffffffc); 1342 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 1343 amdgpu_ring_write(ring, seq); /* reference */ 1344 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 1345 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1346 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 1347 } 1348 1349 1350 /** 1351 * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA 1352 * 1353 * @ring: amdgpu_ring pointer 1354 * @vmid: vmid number to use 1355 * @pd_addr: address 1356 * 1357 * Update the page table base and flush the VM TLB 1358 * using sDMA (NAVI10). 1359 */ 1360 static void sdma_v5_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1361 unsigned vmid, uint64_t pd_addr) 1362 { 1363 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1364 } 1365 1366 static void sdma_v5_0_ring_emit_wreg(struct amdgpu_ring *ring, 1367 uint32_t reg, uint32_t val) 1368 { 1369 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1370 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1371 amdgpu_ring_write(ring, reg); 1372 amdgpu_ring_write(ring, val); 1373 } 1374 1375 static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1376 uint32_t val, uint32_t mask) 1377 { 1378 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1379 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1380 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1381 amdgpu_ring_write(ring, reg << 2); 1382 amdgpu_ring_write(ring, 0); 1383 amdgpu_ring_write(ring, val); /* reference */ 1384 amdgpu_ring_write(ring, mask); /* mask */ 1385 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1386 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1387 } 1388 1389 static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 1390 uint32_t reg0, uint32_t reg1, 1391 uint32_t ref, uint32_t mask) 1392 { 1393 amdgpu_ring_emit_wreg(ring, reg0, ref); 1394 /* wait for a cycle to reset vm_inv_eng*_ack */ 1395 amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); 1396 amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); 1397 } 1398 1399 static int sdma_v5_0_early_init(void *handle) 1400 { 1401 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1402 1403 sdma_v5_0_set_ring_funcs(adev); 1404 sdma_v5_0_set_buffer_funcs(adev); 1405 sdma_v5_0_set_vm_pte_funcs(adev); 1406 sdma_v5_0_set_irq_funcs(adev); 1407 sdma_v5_0_set_mqd_funcs(adev); 1408 1409 return 0; 1410 } 1411 1412 1413 static int sdma_v5_0_sw_init(void *handle) 1414 { 1415 struct amdgpu_ring *ring; 1416 int r, i; 1417 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1418 1419 /* SDMA trap event */ 1420 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 1421 SDMA0_5_0__SRCID__SDMA_TRAP, 1422 &adev->sdma.trap_irq); 1423 if (r) 1424 return r; 1425 1426 /* SDMA trap event */ 1427 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 1428 SDMA1_5_0__SRCID__SDMA_TRAP, 1429 &adev->sdma.trap_irq); 1430 if (r) 1431 return r; 1432 1433 r = sdma_v5_0_init_microcode(adev); 1434 if (r) { 1435 DRM_ERROR("Failed to load sdma firmware!\n"); 1436 return r; 1437 } 1438 1439 for (i = 0; i < adev->sdma.num_instances; i++) { 1440 ring = &adev->sdma.instance[i].ring; 1441 ring->ring_obj = NULL; 1442 ring->use_doorbell = true; 1443 1444 DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, 1445 ring->use_doorbell?"true":"false"); 1446 1447 ring->doorbell_index = (i == 0) ? 1448 (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset 1449 : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset 1450 1451 sprintf(ring->name, "sdma%d", i); 1452 r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, 1453 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : 1454 AMDGPU_SDMA_IRQ_INSTANCE1, 1455 AMDGPU_RING_PRIO_DEFAULT, NULL); 1456 if (r) 1457 return r; 1458 } 1459 1460 return r; 1461 } 1462 1463 static int sdma_v5_0_sw_fini(void *handle) 1464 { 1465 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1466 int i; 1467 1468 for (i = 0; i < adev->sdma.num_instances; i++) { 1469 release_firmware(adev->sdma.instance[i].fw); 1470 adev->sdma.instance[i].fw = NULL; 1471 1472 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1473 } 1474 1475 return 0; 1476 } 1477 1478 static int sdma_v5_0_hw_init(void *handle) 1479 { 1480 int r; 1481 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1482 1483 sdma_v5_0_init_golden_registers(adev); 1484 1485 r = sdma_v5_0_start(adev); 1486 1487 return r; 1488 } 1489 1490 static int sdma_v5_0_hw_fini(void *handle) 1491 { 1492 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1493 1494 if (amdgpu_sriov_vf(adev)) 1495 return 0; 1496 1497 sdma_v5_0_ctx_switch_enable(adev, false); 1498 sdma_v5_0_enable(adev, false); 1499 1500 return 0; 1501 } 1502 1503 static int sdma_v5_0_suspend(void *handle) 1504 { 1505 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1506 1507 return sdma_v5_0_hw_fini(adev); 1508 } 1509 1510 static int sdma_v5_0_resume(void *handle) 1511 { 1512 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1513 1514 return sdma_v5_0_hw_init(adev); 1515 } 1516 1517 static bool sdma_v5_0_is_idle(void *handle) 1518 { 1519 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1520 u32 i; 1521 1522 for (i = 0; i < adev->sdma.num_instances; i++) { 1523 u32 tmp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); 1524 1525 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 1526 return false; 1527 } 1528 1529 return true; 1530 } 1531 1532 static int sdma_v5_0_wait_for_idle(void *handle) 1533 { 1534 unsigned i; 1535 u32 sdma0, sdma1; 1536 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1537 1538 for (i = 0; i < adev->usec_timeout; i++) { 1539 sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); 1540 sdma1 = RREG32(sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); 1541 1542 if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) 1543 return 0; 1544 udelay(1); 1545 } 1546 return -ETIMEDOUT; 1547 } 1548 1549 static int sdma_v5_0_soft_reset(void *handle) 1550 { 1551 /* todo */ 1552 1553 return 0; 1554 } 1555 1556 static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) 1557 { 1558 int i, r = 0; 1559 struct amdgpu_device *adev = ring->adev; 1560 u32 index = 0; 1561 u64 sdma_gfx_preempt; 1562 1563 amdgpu_sdma_get_index_from_ring(ring, &index); 1564 if (index == 0) 1565 sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT; 1566 else 1567 sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT; 1568 1569 /* assert preemption condition */ 1570 amdgpu_ring_set_preempt_cond_exec(ring, false); 1571 1572 /* emit the trailing fence */ 1573 ring->trail_seq += 1; 1574 amdgpu_ring_alloc(ring, 10); 1575 sdma_v5_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 1576 ring->trail_seq, 0); 1577 amdgpu_ring_commit(ring); 1578 1579 /* assert IB preemption */ 1580 WREG32(sdma_gfx_preempt, 1); 1581 1582 /* poll the trailing fence */ 1583 for (i = 0; i < adev->usec_timeout; i++) { 1584 if (ring->trail_seq == 1585 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 1586 break; 1587 udelay(1); 1588 } 1589 1590 if (i >= adev->usec_timeout) { 1591 r = -EINVAL; 1592 DRM_ERROR("ring %d failed to be preempted\n", ring->idx); 1593 } 1594 1595 /* deassert IB preemption */ 1596 WREG32(sdma_gfx_preempt, 0); 1597 1598 /* deassert the preemption condition */ 1599 amdgpu_ring_set_preempt_cond_exec(ring, true); 1600 return r; 1601 } 1602 1603 static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev, 1604 struct amdgpu_irq_src *source, 1605 unsigned type, 1606 enum amdgpu_interrupt_state state) 1607 { 1608 u32 sdma_cntl; 1609 1610 if (!amdgpu_sriov_vf(adev)) { 1611 u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? 1612 sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : 1613 sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); 1614 1615 sdma_cntl = RREG32(reg_offset); 1616 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1617 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 1618 WREG32(reg_offset, sdma_cntl); 1619 } 1620 1621 return 0; 1622 } 1623 1624 static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev, 1625 struct amdgpu_irq_src *source, 1626 struct amdgpu_iv_entry *entry) 1627 { 1628 uint32_t mes_queue_id = entry->src_data[0]; 1629 1630 DRM_DEBUG("IH: SDMA trap\n"); 1631 1632 if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 1633 struct amdgpu_mes_queue *queue; 1634 1635 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 1636 1637 spin_lock(&adev->mes.queue_id_lock); 1638 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 1639 if (queue) { 1640 DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); 1641 amdgpu_fence_process(queue->ring); 1642 } 1643 spin_unlock(&adev->mes.queue_id_lock); 1644 return 0; 1645 } 1646 1647 switch (entry->client_id) { 1648 case SOC15_IH_CLIENTID_SDMA0: 1649 switch (entry->ring_id) { 1650 case 0: 1651 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1652 break; 1653 case 1: 1654 /* XXX compute */ 1655 break; 1656 case 2: 1657 /* XXX compute */ 1658 break; 1659 case 3: 1660 /* XXX page queue*/ 1661 break; 1662 } 1663 break; 1664 case SOC15_IH_CLIENTID_SDMA1: 1665 switch (entry->ring_id) { 1666 case 0: 1667 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1668 break; 1669 case 1: 1670 /* XXX compute */ 1671 break; 1672 case 2: 1673 /* XXX compute */ 1674 break; 1675 case 3: 1676 /* XXX page queue*/ 1677 break; 1678 } 1679 break; 1680 } 1681 return 0; 1682 } 1683 1684 static int sdma_v5_0_process_illegal_inst_irq(struct amdgpu_device *adev, 1685 struct amdgpu_irq_src *source, 1686 struct amdgpu_iv_entry *entry) 1687 { 1688 return 0; 1689 } 1690 1691 static void sdma_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 1692 bool enable) 1693 { 1694 uint32_t data, def; 1695 int i; 1696 1697 for (i = 0; i < adev->sdma.num_instances; i++) { 1698 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 1699 /* Enable sdma clock gating */ 1700 def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); 1701 data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1702 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1703 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1704 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1705 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1706 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1707 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1708 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1709 if (def != data) 1710 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); 1711 } else { 1712 /* Disable sdma clock gating */ 1713 def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); 1714 data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1715 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1716 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | 1717 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | 1718 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | 1719 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | 1720 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | 1721 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); 1722 if (def != data) 1723 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); 1724 } 1725 } 1726 } 1727 1728 static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, 1729 bool enable) 1730 { 1731 uint32_t data, def; 1732 int i; 1733 1734 for (i = 0; i < adev->sdma.num_instances; i++) { 1735 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { 1736 /* Enable sdma mem light sleep */ 1737 def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); 1738 data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1739 if (def != data) 1740 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); 1741 1742 } else { 1743 /* Disable sdma mem light sleep */ 1744 def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); 1745 data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1746 if (def != data) 1747 WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); 1748 1749 } 1750 } 1751 } 1752 1753 static int sdma_v5_0_set_clockgating_state(void *handle, 1754 enum amd_clockgating_state state) 1755 { 1756 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1757 1758 if (amdgpu_sriov_vf(adev)) 1759 return 0; 1760 1761 switch (adev->ip_versions[SDMA0_HWIP][0]) { 1762 case IP_VERSION(5, 0, 0): 1763 case IP_VERSION(5, 0, 2): 1764 case IP_VERSION(5, 0, 5): 1765 sdma_v5_0_update_medium_grain_clock_gating(adev, 1766 state == AMD_CG_STATE_GATE); 1767 sdma_v5_0_update_medium_grain_light_sleep(adev, 1768 state == AMD_CG_STATE_GATE); 1769 break; 1770 default: 1771 break; 1772 } 1773 1774 return 0; 1775 } 1776 1777 static int sdma_v5_0_set_powergating_state(void *handle, 1778 enum amd_powergating_state state) 1779 { 1780 return 0; 1781 } 1782 1783 static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) 1784 { 1785 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1786 int data; 1787 1788 if (amdgpu_sriov_vf(adev)) 1789 *flags = 0; 1790 1791 /* AMD_CG_SUPPORT_SDMA_MGCG */ 1792 data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL)); 1793 if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK)) 1794 *flags |= AMD_CG_SUPPORT_SDMA_MGCG; 1795 1796 /* AMD_CG_SUPPORT_SDMA_LS */ 1797 data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); 1798 if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) 1799 *flags |= AMD_CG_SUPPORT_SDMA_LS; 1800 } 1801 1802 const struct amd_ip_funcs sdma_v5_0_ip_funcs = { 1803 .name = "sdma_v5_0", 1804 .early_init = sdma_v5_0_early_init, 1805 .late_init = NULL, 1806 .sw_init = sdma_v5_0_sw_init, 1807 .sw_fini = sdma_v5_0_sw_fini, 1808 .hw_init = sdma_v5_0_hw_init, 1809 .hw_fini = sdma_v5_0_hw_fini, 1810 .suspend = sdma_v5_0_suspend, 1811 .resume = sdma_v5_0_resume, 1812 .is_idle = sdma_v5_0_is_idle, 1813 .wait_for_idle = sdma_v5_0_wait_for_idle, 1814 .soft_reset = sdma_v5_0_soft_reset, 1815 .set_clockgating_state = sdma_v5_0_set_clockgating_state, 1816 .set_powergating_state = sdma_v5_0_set_powergating_state, 1817 .get_clockgating_state = sdma_v5_0_get_clockgating_state, 1818 }; 1819 1820 static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 1821 .type = AMDGPU_RING_TYPE_SDMA, 1822 .align_mask = 0xf, 1823 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1824 .support_64bit_ptrs = true, 1825 .secure_submission_supported = true, 1826 .vmhub = AMDGPU_GFXHUB_0, 1827 .get_rptr = sdma_v5_0_ring_get_rptr, 1828 .get_wptr = sdma_v5_0_ring_get_wptr, 1829 .set_wptr = sdma_v5_0_ring_set_wptr, 1830 .emit_frame_size = 1831 5 + /* sdma_v5_0_ring_init_cond_exec */ 1832 6 + /* sdma_v5_0_ring_emit_hdp_flush */ 1833 3 + /* hdp_invalidate */ 1834 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ 1835 /* sdma_v5_0_ring_emit_vm_flush */ 1836 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1837 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 1838 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ 1839 .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ 1840 .emit_ib = sdma_v5_0_ring_emit_ib, 1841 .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync, 1842 .emit_fence = sdma_v5_0_ring_emit_fence, 1843 .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, 1844 .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush, 1845 .emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush, 1846 .test_ring = sdma_v5_0_ring_test_ring, 1847 .test_ib = sdma_v5_0_ring_test_ib, 1848 .insert_nop = sdma_v5_0_ring_insert_nop, 1849 .pad_ib = sdma_v5_0_ring_pad_ib, 1850 .emit_wreg = sdma_v5_0_ring_emit_wreg, 1851 .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, 1852 .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, 1853 .init_cond_exec = sdma_v5_0_ring_init_cond_exec, 1854 .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, 1855 .preempt_ib = sdma_v5_0_ring_preempt_ib, 1856 }; 1857 1858 static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev) 1859 { 1860 int i; 1861 1862 for (i = 0; i < adev->sdma.num_instances; i++) { 1863 adev->sdma.instance[i].ring.funcs = &sdma_v5_0_ring_funcs; 1864 adev->sdma.instance[i].ring.me = i; 1865 } 1866 } 1867 1868 static const struct amdgpu_irq_src_funcs sdma_v5_0_trap_irq_funcs = { 1869 .set = sdma_v5_0_set_trap_irq_state, 1870 .process = sdma_v5_0_process_trap_irq, 1871 }; 1872 1873 static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = { 1874 .process = sdma_v5_0_process_illegal_inst_irq, 1875 }; 1876 1877 static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) 1878 { 1879 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + 1880 adev->sdma.num_instances; 1881 adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs; 1882 adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs; 1883 } 1884 1885 /** 1886 * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine 1887 * 1888 * @ib: indirect buffer to copy to 1889 * @src_offset: src GPU address 1890 * @dst_offset: dst GPU address 1891 * @byte_count: number of bytes to xfer 1892 * @tmz: if a secure copy should be used 1893 * 1894 * Copy GPU buffers using the DMA engine (NAVI10). 1895 * Used by the amdgpu ttm implementation to move pages if 1896 * registered as the asic copy callback. 1897 */ 1898 static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, 1899 uint64_t src_offset, 1900 uint64_t dst_offset, 1901 uint32_t byte_count, 1902 bool tmz) 1903 { 1904 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1905 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | 1906 SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); 1907 ib->ptr[ib->length_dw++] = byte_count - 1; 1908 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1909 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1910 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1911 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1912 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1913 } 1914 1915 /** 1916 * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine 1917 * 1918 * @ib: indirect buffer to fill 1919 * @src_data: value to write to buffer 1920 * @dst_offset: dst GPU address 1921 * @byte_count: number of bytes to xfer 1922 * 1923 * Fill GPU buffers using the DMA engine (NAVI10). 1924 */ 1925 static void sdma_v5_0_emit_fill_buffer(struct amdgpu_ib *ib, 1926 uint32_t src_data, 1927 uint64_t dst_offset, 1928 uint32_t byte_count) 1929 { 1930 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1931 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1932 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1933 ib->ptr[ib->length_dw++] = src_data; 1934 ib->ptr[ib->length_dw++] = byte_count - 1; 1935 } 1936 1937 static const struct amdgpu_buffer_funcs sdma_v5_0_buffer_funcs = { 1938 .copy_max_bytes = 0x400000, 1939 .copy_num_dw = 7, 1940 .emit_copy_buffer = sdma_v5_0_emit_copy_buffer, 1941 1942 .fill_max_bytes = 0x400000, 1943 .fill_num_dw = 5, 1944 .emit_fill_buffer = sdma_v5_0_emit_fill_buffer, 1945 }; 1946 1947 static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev) 1948 { 1949 if (adev->mman.buffer_funcs == NULL) { 1950 adev->mman.buffer_funcs = &sdma_v5_0_buffer_funcs; 1951 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1952 } 1953 } 1954 1955 static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { 1956 .copy_pte_num_dw = 7, 1957 .copy_pte = sdma_v5_0_vm_copy_pte, 1958 .write_pte = sdma_v5_0_vm_write_pte, 1959 .set_pte_pde = sdma_v5_0_vm_set_pte_pde, 1960 }; 1961 1962 static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1963 { 1964 unsigned i; 1965 1966 if (adev->vm_manager.vm_pte_funcs == NULL) { 1967 adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs; 1968 for (i = 0; i < adev->sdma.num_instances; i++) { 1969 adev->vm_manager.vm_pte_scheds[i] = 1970 &adev->sdma.instance[i].ring.sched; 1971 } 1972 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 1973 } 1974 } 1975 1976 const struct amdgpu_ip_block_version sdma_v5_0_ip_block = { 1977 .type = AMD_IP_BLOCK_TYPE_SDMA, 1978 .major = 5, 1979 .minor = 0, 1980 .rev = 0, 1981 .funcs = &sdma_v5_0_ip_funcs, 1982 }; 1983