1 /* 2 * Copyright 2018 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu.h" 25 #include "amdgpu_discovery.h" 26 #include "soc15_common.h" 27 #include "soc15_hw_ip.h" 28 #include "nbio/nbio_2_3_offset.h" 29 #include "discovery.h" 30 31 #define mmRCC_CONFIG_MEMSIZE 0xde3 32 #define mmMM_INDEX 0x0 33 #define mmMM_INDEX_HI 0x6 34 #define mmMM_DATA 0x1 35 #define HW_ID_MAX 300 36 37 const char *hw_id_names[HW_ID_MAX] = { 38 [MP1_HWID] = "MP1", 39 [MP2_HWID] = "MP2", 40 [THM_HWID] = "THM", 41 [SMUIO_HWID] = "SMUIO", 42 [FUSE_HWID] = "FUSE", 43 [CLKA_HWID] = "CLKA", 44 [PWR_HWID] = "PWR", 45 [GC_HWID] = "GC", 46 [UVD_HWID] = "UVD", 47 [AUDIO_AZ_HWID] = "AUDIO_AZ", 48 [ACP_HWID] = "ACP", 49 [DCI_HWID] = "DCI", 50 [DMU_HWID] = "DMU", 51 [DCO_HWID] = "DCO", 52 [DIO_HWID] = "DIO", 53 [XDMA_HWID] = "XDMA", 54 [DCEAZ_HWID] = "DCEAZ", 55 [DAZ_HWID] = "DAZ", 56 [SDPMUX_HWID] = "SDPMUX", 57 [NTB_HWID] = "NTB", 58 [IOHC_HWID] = "IOHC", 59 [L2IMU_HWID] = "L2IMU", 60 [VCE_HWID] = "VCE", 61 [MMHUB_HWID] = "MMHUB", 62 [ATHUB_HWID] = "ATHUB", 63 [DBGU_NBIO_HWID] = "DBGU_NBIO", 64 [DFX_HWID] = "DFX", 65 [DBGU0_HWID] = "DBGU0", 66 [DBGU1_HWID] = "DBGU1", 67 [OSSSYS_HWID] = "OSSSYS", 68 [HDP_HWID] = "HDP", 69 [SDMA0_HWID] = "SDMA0", 70 [SDMA1_HWID] = "SDMA1", 71 [ISP_HWID] = "ISP", 72 [DBGU_IO_HWID] = "DBGU_IO", 73 [DF_HWID] = "DF", 74 [CLKB_HWID] = "CLKB", 75 [FCH_HWID] = "FCH", 76 [DFX_DAP_HWID] = "DFX_DAP", 77 [L1IMU_PCIE_HWID] = "L1IMU_PCIE", 78 [L1IMU_NBIF_HWID] = "L1IMU_NBIF", 79 [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR", 80 [L1IMU3_HWID] = "L1IMU3", 81 [L1IMU4_HWID] = "L1IMU4", 82 [L1IMU5_HWID] = "L1IMU5", 83 [L1IMU6_HWID] = "L1IMU6", 84 [L1IMU7_HWID] = "L1IMU7", 85 [L1IMU8_HWID] = "L1IMU8", 86 [L1IMU9_HWID] = "L1IMU9", 87 [L1IMU10_HWID] = "L1IMU10", 88 [L1IMU11_HWID] = "L1IMU11", 89 [L1IMU12_HWID] = "L1IMU12", 90 [L1IMU13_HWID] = "L1IMU13", 91 [L1IMU14_HWID] = "L1IMU14", 92 [L1IMU15_HWID] = "L1IMU15", 93 [WAFLC_HWID] = "WAFLC", 94 [FCH_USB_PD_HWID] = "FCH_USB_PD", 95 [PCIE_HWID] = "PCIE", 96 [PCS_HWID] = "PCS", 97 [DDCL_HWID] = "DDCL", 98 [SST_HWID] = "SST", 99 [IOAGR_HWID] = "IOAGR", 100 [NBIF_HWID] = "NBIF", 101 [IOAPIC_HWID] = "IOAPIC", 102 [SYSTEMHUB_HWID] = "SYSTEMHUB", 103 [NTBCCP_HWID] = "NTBCCP", 104 [UMC_HWID] = "UMC", 105 [SATA_HWID] = "SATA", 106 [USB_HWID] = "USB", 107 [CCXSEC_HWID] = "CCXSEC", 108 [XGMI_HWID] = "XGMI", 109 [XGBE_HWID] = "XGBE", 110 [MP0_HWID] = "MP0", 111 }; 112 113 static int hw_id_map[MAX_HWIP] = { 114 [GC_HWIP] = GC_HWID, 115 [HDP_HWIP] = HDP_HWID, 116 [SDMA0_HWIP] = SDMA0_HWID, 117 [SDMA1_HWIP] = SDMA1_HWID, 118 [MMHUB_HWIP] = MMHUB_HWID, 119 [ATHUB_HWIP] = ATHUB_HWID, 120 [NBIO_HWIP] = NBIF_HWID, 121 [MP0_HWIP] = MP0_HWID, 122 [MP1_HWIP] = MP1_HWID, 123 [UVD_HWIP] = UVD_HWID, 124 [VCE_HWIP] = VCE_HWID, 125 [DF_HWIP] = DF_HWID, 126 [DCE_HWIP] = DMU_HWID, 127 [OSSSYS_HWIP] = OSSSYS_HWID, 128 [SMUIO_HWIP] = SMUIO_HWID, 129 [PWR_HWIP] = PWR_HWID, 130 [NBIF_HWIP] = NBIF_HWID, 131 [THM_HWIP] = THM_HWID, 132 [CLK_HWIP] = CLKA_HWID, 133 }; 134 135 static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) 136 { 137 uint32_t *p = (uint32_t *)binary; 138 uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; 139 uint64_t pos = vram_size - BINARY_MAX_SIZE; 140 unsigned long flags; 141 142 while (pos < vram_size) { 143 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 144 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 145 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); 146 *p++ = RREG32_NO_KIQ(mmMM_DATA); 147 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 148 pos += 4; 149 } 150 151 return 0; 152 } 153 154 static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) 155 { 156 uint16_t checksum = 0; 157 int i; 158 159 for (i = 0; i < size; i++) 160 checksum += data[i]; 161 162 return checksum; 163 } 164 165 static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size, 166 uint16_t expected) 167 { 168 return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); 169 } 170 171 int amdgpu_discovery_init(struct amdgpu_device *adev) 172 { 173 struct table_info *info; 174 struct binary_header *bhdr; 175 struct ip_discovery_header *ihdr; 176 struct gpu_info_header *ghdr; 177 uint16_t offset; 178 uint16_t size; 179 uint16_t checksum; 180 int r; 181 182 adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL); 183 if (!adev->discovery) 184 return -ENOMEM; 185 186 r = amdgpu_discovery_read_binary(adev, adev->discovery); 187 if (r) { 188 DRM_ERROR("failed to read ip discovery binary\n"); 189 goto out; 190 } 191 192 bhdr = (struct binary_header *)adev->discovery; 193 194 if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { 195 DRM_ERROR("invalid ip discovery binary signature\n"); 196 r = -EINVAL; 197 goto out; 198 } 199 200 offset = offsetof(struct binary_header, binary_checksum) + 201 sizeof(bhdr->binary_checksum); 202 size = bhdr->binary_size - offset; 203 checksum = bhdr->binary_checksum; 204 205 if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, 206 size, checksum)) { 207 DRM_ERROR("invalid ip discovery binary checksum\n"); 208 r = -EINVAL; 209 goto out; 210 } 211 212 info = &bhdr->table_list[IP_DISCOVERY]; 213 offset = le16_to_cpu(info->offset); 214 checksum = le16_to_cpu(info->checksum); 215 ihdr = (struct ip_discovery_header *)(adev->discovery + offset); 216 217 if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { 218 DRM_ERROR("invalid ip discovery data table signature\n"); 219 r = -EINVAL; 220 goto out; 221 } 222 223 if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, 224 ihdr->size, checksum)) { 225 DRM_ERROR("invalid ip discovery data table checksum\n"); 226 r = -EINVAL; 227 goto out; 228 } 229 230 info = &bhdr->table_list[GC]; 231 offset = le16_to_cpu(info->offset); 232 checksum = le16_to_cpu(info->checksum); 233 ghdr = (struct gpu_info_header *)(adev->discovery + offset); 234 235 if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, 236 ghdr->size, checksum)) { 237 DRM_ERROR("invalid gc data table checksum\n"); 238 r = -EINVAL; 239 goto out; 240 } 241 242 return 0; 243 244 out: 245 kfree(adev->discovery); 246 adev->discovery = NULL; 247 248 return r; 249 } 250 251 void amdgpu_discovery_fini(struct amdgpu_device *adev) 252 { 253 kfree(adev->discovery); 254 adev->discovery = NULL; 255 } 256 257 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) 258 { 259 struct binary_header *bhdr; 260 struct ip_discovery_header *ihdr; 261 struct die_header *dhdr; 262 struct ip *ip; 263 uint16_t die_offset; 264 uint16_t ip_offset; 265 uint16_t num_dies; 266 uint16_t num_ips; 267 uint8_t num_base_address; 268 int hw_ip; 269 int i, j, k; 270 271 if (!adev->discovery) { 272 DRM_ERROR("ip discovery uninitialized\n"); 273 return -EINVAL; 274 } 275 276 bhdr = (struct binary_header *)adev->discovery; 277 ihdr = (struct ip_discovery_header *)(adev->discovery + 278 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); 279 num_dies = le16_to_cpu(ihdr->num_dies); 280 281 DRM_DEBUG("number of dies: %d\n", num_dies); 282 283 for (i = 0; i < num_dies; i++) { 284 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); 285 dhdr = (struct die_header *)(adev->discovery + die_offset); 286 num_ips = le16_to_cpu(dhdr->num_ips); 287 ip_offset = die_offset + sizeof(*dhdr); 288 289 if (le16_to_cpu(dhdr->die_id) != i) { 290 DRM_ERROR("invalid die id %d, expected %d\n", 291 le16_to_cpu(dhdr->die_id), i); 292 return -EINVAL; 293 } 294 295 DRM_DEBUG("number of hardware IPs on die%d: %d\n", 296 le16_to_cpu(dhdr->die_id), num_ips); 297 298 for (j = 0; j < num_ips; j++) { 299 ip = (struct ip *)(adev->discovery + ip_offset); 300 num_base_address = ip->num_base_address; 301 302 DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", 303 hw_id_names[le16_to_cpu(ip->hw_id)], 304 le16_to_cpu(ip->hw_id), 305 ip->number_instance, 306 ip->major, ip->minor, 307 ip->revision); 308 309 for (k = 0; k < num_base_address; k++) { 310 /* 311 * convert the endianness of base addresses in place, 312 * so that we don't need to convert them when accessing adev->reg_offset. 313 */ 314 ip->base_address[k] = le32_to_cpu(ip->base_address[k]); 315 DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); 316 } 317 318 for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { 319 if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { 320 DRM_INFO("set register base offset for %s\n", 321 hw_id_names[le16_to_cpu(ip->hw_id)]); 322 adev->reg_offset[hw_ip][ip->number_instance] = 323 ip->base_address; 324 } 325 326 } 327 328 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); 329 } 330 } 331 332 return 0; 333 } 334 335 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, 336 int *major, int *minor) 337 { 338 struct binary_header *bhdr; 339 struct ip_discovery_header *ihdr; 340 struct die_header *dhdr; 341 struct ip *ip; 342 uint16_t die_offset; 343 uint16_t ip_offset; 344 uint16_t num_dies; 345 uint16_t num_ips; 346 int i, j; 347 348 if (!adev->discovery) { 349 DRM_ERROR("ip discovery uninitialized\n"); 350 return -EINVAL; 351 } 352 353 bhdr = (struct binary_header *)adev->discovery; 354 ihdr = (struct ip_discovery_header *)(adev->discovery + 355 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); 356 num_dies = le16_to_cpu(ihdr->num_dies); 357 358 for (i = 0; i < num_dies; i++) { 359 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); 360 dhdr = (struct die_header *)(adev->discovery + die_offset); 361 num_ips = le16_to_cpu(dhdr->num_ips); 362 ip_offset = die_offset + sizeof(*dhdr); 363 364 for (j = 0; j < num_ips; j++) { 365 ip = (struct ip *)(adev->discovery + ip_offset); 366 367 if (le16_to_cpu(ip->hw_id) == hw_id) { 368 if (major) 369 *major = ip->major; 370 if (minor) 371 *minor = ip->minor; 372 return 0; 373 } 374 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); 375 } 376 } 377 378 return -EINVAL; 379 } 380 381 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) 382 { 383 struct binary_header *bhdr; 384 struct gc_info_v1_0 *gc_info; 385 386 if (!adev->discovery) { 387 DRM_ERROR("ip discovery uninitialized\n"); 388 return -EINVAL; 389 } 390 391 bhdr = (struct binary_header *)adev->discovery; 392 gc_info = (struct gc_info_v1_0 *)(adev->discovery + 393 le16_to_cpu(bhdr->table_list[GC].offset)); 394 395 adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); 396 adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + 397 le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); 398 adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); 399 adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); 400 adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); 401 adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); 402 adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); 403 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); 404 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); 405 adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); 406 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); 407 adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); 408 adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); 409 adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); 410 adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / 411 le32_to_cpu(gc_info->gc_num_sa_per_se); 412 adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); 413 414 return 0; 415 } 416