1 /* 2 * Copyright 2018 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu.h" 25 #include "amdgpu_discovery.h" 26 #include "soc15_hw_ip.h" 27 #include "discovery.h" 28 29 #define mmRCC_CONFIG_MEMSIZE 0xde3 30 #define mmMM_INDEX 0x0 31 #define mmMM_INDEX_HI 0x6 32 #define mmMM_DATA 0x1 33 #define HW_ID_MAX 300 34 35 static const char *hw_id_names[HW_ID_MAX] = { 36 [MP1_HWID] = "MP1", 37 [MP2_HWID] = "MP2", 38 [THM_HWID] = "THM", 39 [SMUIO_HWID] = "SMUIO", 40 [FUSE_HWID] = "FUSE", 41 [CLKA_HWID] = "CLKA", 42 [PWR_HWID] = "PWR", 43 [GC_HWID] = "GC", 44 [UVD_HWID] = "UVD", 45 [AUDIO_AZ_HWID] = "AUDIO_AZ", 46 [ACP_HWID] = "ACP", 47 [DCI_HWID] = "DCI", 48 [DMU_HWID] = "DMU", 49 [DCO_HWID] = "DCO", 50 [DIO_HWID] = "DIO", 51 [XDMA_HWID] = "XDMA", 52 [DCEAZ_HWID] = "DCEAZ", 53 [DAZ_HWID] = "DAZ", 54 [SDPMUX_HWID] = "SDPMUX", 55 [NTB_HWID] = "NTB", 56 [IOHC_HWID] = "IOHC", 57 [L2IMU_HWID] = "L2IMU", 58 [VCE_HWID] = "VCE", 59 [MMHUB_HWID] = "MMHUB", 60 [ATHUB_HWID] = "ATHUB", 61 [DBGU_NBIO_HWID] = "DBGU_NBIO", 62 [DFX_HWID] = "DFX", 63 [DBGU0_HWID] = "DBGU0", 64 [DBGU1_HWID] = "DBGU1", 65 [OSSSYS_HWID] = "OSSSYS", 66 [HDP_HWID] = "HDP", 67 [SDMA0_HWID] = "SDMA0", 68 [SDMA1_HWID] = "SDMA1", 69 [ISP_HWID] = "ISP", 70 [DBGU_IO_HWID] = "DBGU_IO", 71 [DF_HWID] = "DF", 72 [CLKB_HWID] = "CLKB", 73 [FCH_HWID] = "FCH", 74 [DFX_DAP_HWID] = "DFX_DAP", 75 [L1IMU_PCIE_HWID] = "L1IMU_PCIE", 76 [L1IMU_NBIF_HWID] = "L1IMU_NBIF", 77 [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR", 78 [L1IMU3_HWID] = "L1IMU3", 79 [L1IMU4_HWID] = "L1IMU4", 80 [L1IMU5_HWID] = "L1IMU5", 81 [L1IMU6_HWID] = "L1IMU6", 82 [L1IMU7_HWID] = "L1IMU7", 83 [L1IMU8_HWID] = "L1IMU8", 84 [L1IMU9_HWID] = "L1IMU9", 85 [L1IMU10_HWID] = "L1IMU10", 86 [L1IMU11_HWID] = "L1IMU11", 87 [L1IMU12_HWID] = "L1IMU12", 88 [L1IMU13_HWID] = "L1IMU13", 89 [L1IMU14_HWID] = "L1IMU14", 90 [L1IMU15_HWID] = "L1IMU15", 91 [WAFLC_HWID] = "WAFLC", 92 [FCH_USB_PD_HWID] = "FCH_USB_PD", 93 [PCIE_HWID] = "PCIE", 94 [PCS_HWID] = "PCS", 95 [DDCL_HWID] = "DDCL", 96 [SST_HWID] = "SST", 97 [IOAGR_HWID] = "IOAGR", 98 [NBIF_HWID] = "NBIF", 99 [IOAPIC_HWID] = "IOAPIC", 100 [SYSTEMHUB_HWID] = "SYSTEMHUB", 101 [NTBCCP_HWID] = "NTBCCP", 102 [UMC_HWID] = "UMC", 103 [SATA_HWID] = "SATA", 104 [USB_HWID] = "USB", 105 [CCXSEC_HWID] = "CCXSEC", 106 [XGMI_HWID] = "XGMI", 107 [XGBE_HWID] = "XGBE", 108 [MP0_HWID] = "MP0", 109 }; 110 111 static int hw_id_map[MAX_HWIP] = { 112 [GC_HWIP] = GC_HWID, 113 [HDP_HWIP] = HDP_HWID, 114 [SDMA0_HWIP] = SDMA0_HWID, 115 [SDMA1_HWIP] = SDMA1_HWID, 116 [MMHUB_HWIP] = MMHUB_HWID, 117 [ATHUB_HWIP] = ATHUB_HWID, 118 [NBIO_HWIP] = NBIF_HWID, 119 [MP0_HWIP] = MP0_HWID, 120 [MP1_HWIP] = MP1_HWID, 121 [UVD_HWIP] = UVD_HWID, 122 [VCE_HWIP] = VCE_HWID, 123 [DF_HWIP] = DF_HWID, 124 [DCE_HWIP] = DMU_HWID, 125 [OSSSYS_HWIP] = OSSSYS_HWID, 126 [SMUIO_HWIP] = SMUIO_HWID, 127 [PWR_HWIP] = PWR_HWID, 128 [NBIF_HWIP] = NBIF_HWID, 129 [THM_HWIP] = THM_HWID, 130 [CLK_HWIP] = CLKA_HWID, 131 [UMC_HWIP] = UMC_HWID, 132 }; 133 134 static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) 135 { 136 uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; 137 uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; 138 139 amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, 140 adev->mman.discovery_tmr_size, false); 141 return 0; 142 } 143 144 static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) 145 { 146 uint16_t checksum = 0; 147 int i; 148 149 for (i = 0; i < size; i++) 150 checksum += data[i]; 151 152 return checksum; 153 } 154 155 static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size, 156 uint16_t expected) 157 { 158 return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); 159 } 160 161 static int amdgpu_discovery_init(struct amdgpu_device *adev) 162 { 163 struct table_info *info; 164 struct binary_header *bhdr; 165 struct ip_discovery_header *ihdr; 166 struct gpu_info_header *ghdr; 167 uint16_t offset; 168 uint16_t size; 169 uint16_t checksum; 170 int r; 171 172 adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE; 173 adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL); 174 if (!adev->mman.discovery_bin) 175 return -ENOMEM; 176 177 r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin); 178 if (r) { 179 DRM_ERROR("failed to read ip discovery binary\n"); 180 goto out; 181 } 182 183 bhdr = (struct binary_header *)adev->mman.discovery_bin; 184 185 if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { 186 DRM_ERROR("invalid ip discovery binary signature\n"); 187 r = -EINVAL; 188 goto out; 189 } 190 191 offset = offsetof(struct binary_header, binary_checksum) + 192 sizeof(bhdr->binary_checksum); 193 size = bhdr->binary_size - offset; 194 checksum = bhdr->binary_checksum; 195 196 if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, 197 size, checksum)) { 198 DRM_ERROR("invalid ip discovery binary checksum\n"); 199 r = -EINVAL; 200 goto out; 201 } 202 203 info = &bhdr->table_list[IP_DISCOVERY]; 204 offset = le16_to_cpu(info->offset); 205 checksum = le16_to_cpu(info->checksum); 206 ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset); 207 208 if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { 209 DRM_ERROR("invalid ip discovery data table signature\n"); 210 r = -EINVAL; 211 goto out; 212 } 213 214 if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, 215 ihdr->size, checksum)) { 216 DRM_ERROR("invalid ip discovery data table checksum\n"); 217 r = -EINVAL; 218 goto out; 219 } 220 221 info = &bhdr->table_list[GC]; 222 offset = le16_to_cpu(info->offset); 223 checksum = le16_to_cpu(info->checksum); 224 ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset); 225 226 if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, 227 ghdr->size, checksum)) { 228 DRM_ERROR("invalid gc data table checksum\n"); 229 r = -EINVAL; 230 goto out; 231 } 232 233 return 0; 234 235 out: 236 kfree(adev->mman.discovery_bin); 237 adev->mman.discovery_bin = NULL; 238 239 return r; 240 } 241 242 void amdgpu_discovery_fini(struct amdgpu_device *adev) 243 { 244 kfree(adev->mman.discovery_bin); 245 adev->mman.discovery_bin = NULL; 246 } 247 248 static int amdgpu_discovery_validate_ip(const struct ip *ip) 249 { 250 if (ip->number_instance >= HWIP_MAX_INSTANCE) { 251 DRM_ERROR("Unexpected number_instance (%d) from ip discovery blob\n", 252 ip->number_instance); 253 return -EINVAL; 254 } 255 if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) { 256 DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n", 257 le16_to_cpu(ip->hw_id)); 258 return -EINVAL; 259 } 260 261 return 0; 262 } 263 264 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) 265 { 266 struct binary_header *bhdr; 267 struct ip_discovery_header *ihdr; 268 struct die_header *dhdr; 269 struct ip *ip; 270 uint16_t die_offset; 271 uint16_t ip_offset; 272 uint16_t num_dies; 273 uint16_t num_ips; 274 uint8_t num_base_address; 275 int hw_ip; 276 int i, j, k; 277 int r; 278 279 r = amdgpu_discovery_init(adev); 280 if (r) { 281 DRM_ERROR("amdgpu_discovery_init failed\n"); 282 return r; 283 } 284 285 bhdr = (struct binary_header *)adev->mman.discovery_bin; 286 ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + 287 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); 288 num_dies = le16_to_cpu(ihdr->num_dies); 289 290 DRM_DEBUG("number of dies: %d\n", num_dies); 291 292 for (i = 0; i < num_dies; i++) { 293 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); 294 dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); 295 num_ips = le16_to_cpu(dhdr->num_ips); 296 ip_offset = die_offset + sizeof(*dhdr); 297 298 if (le16_to_cpu(dhdr->die_id) != i) { 299 DRM_ERROR("invalid die id %d, expected %d\n", 300 le16_to_cpu(dhdr->die_id), i); 301 return -EINVAL; 302 } 303 304 DRM_DEBUG("number of hardware IPs on die%d: %d\n", 305 le16_to_cpu(dhdr->die_id), num_ips); 306 307 for (j = 0; j < num_ips; j++) { 308 ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); 309 310 if (amdgpu_discovery_validate_ip(ip)) 311 goto next_ip; 312 313 num_base_address = ip->num_base_address; 314 315 DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", 316 hw_id_names[le16_to_cpu(ip->hw_id)], 317 le16_to_cpu(ip->hw_id), 318 ip->number_instance, 319 ip->major, ip->minor, 320 ip->revision); 321 322 if (le16_to_cpu(ip->hw_id) == VCN_HWID) 323 adev->vcn.num_vcn_inst++; 324 325 for (k = 0; k < num_base_address; k++) { 326 /* 327 * convert the endianness of base addresses in place, 328 * so that we don't need to convert them when accessing adev->reg_offset. 329 */ 330 ip->base_address[k] = le32_to_cpu(ip->base_address[k]); 331 DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); 332 } 333 334 for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { 335 if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { 336 DRM_DEBUG("set register base offset for %s\n", 337 hw_id_names[le16_to_cpu(ip->hw_id)]); 338 adev->reg_offset[hw_ip][ip->number_instance] = 339 ip->base_address; 340 } 341 342 } 343 344 next_ip: 345 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); 346 } 347 } 348 349 return 0; 350 } 351 352 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, 353 int *major, int *minor, int *revision) 354 { 355 struct binary_header *bhdr; 356 struct ip_discovery_header *ihdr; 357 struct die_header *dhdr; 358 struct ip *ip; 359 uint16_t die_offset; 360 uint16_t ip_offset; 361 uint16_t num_dies; 362 uint16_t num_ips; 363 int i, j; 364 365 if (!adev->mman.discovery_bin) { 366 DRM_ERROR("ip discovery uninitialized\n"); 367 return -EINVAL; 368 } 369 370 bhdr = (struct binary_header *)adev->mman.discovery_bin; 371 ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + 372 le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); 373 num_dies = le16_to_cpu(ihdr->num_dies); 374 375 for (i = 0; i < num_dies; i++) { 376 die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); 377 dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); 378 num_ips = le16_to_cpu(dhdr->num_ips); 379 ip_offset = die_offset + sizeof(*dhdr); 380 381 for (j = 0; j < num_ips; j++) { 382 ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); 383 384 if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) { 385 if (major) 386 *major = ip->major; 387 if (minor) 388 *minor = ip->minor; 389 if (revision) 390 *revision = ip->revision; 391 return 0; 392 } 393 ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); 394 } 395 } 396 397 return -EINVAL; 398 } 399 400 401 int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, 402 int *major, int *minor, int *revision) 403 { 404 return amdgpu_discovery_get_ip_version(adev, VCN_HWID, 405 vcn_instance, major, minor, revision); 406 } 407 408 void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) 409 { 410 struct binary_header *bhdr; 411 struct harvest_table *harvest_info; 412 int i, vcn_harvest_count = 0; 413 414 bhdr = (struct binary_header *)adev->mman.discovery_bin; 415 harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + 416 le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset)); 417 418 for (i = 0; i < 32; i++) { 419 if (le32_to_cpu(harvest_info->list[i].hw_id) == 0) 420 break; 421 422 switch (le32_to_cpu(harvest_info->list[i].hw_id)) { 423 case VCN_HWID: 424 vcn_harvest_count++; 425 break; 426 case DMU_HWID: 427 adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; 428 break; 429 default: 430 break; 431 } 432 } 433 if (vcn_harvest_count == adev->vcn.num_vcn_inst) { 434 adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; 435 adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; 436 } 437 } 438 439 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) 440 { 441 struct binary_header *bhdr; 442 struct gc_info_v1_0 *gc_info; 443 444 if (!adev->mman.discovery_bin) { 445 DRM_ERROR("ip discovery uninitialized\n"); 446 return -EINVAL; 447 } 448 449 bhdr = (struct binary_header *)adev->mman.discovery_bin; 450 gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + 451 le16_to_cpu(bhdr->table_list[GC].offset)); 452 453 adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); 454 adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + 455 le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); 456 adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); 457 adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); 458 adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); 459 adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); 460 adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); 461 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); 462 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); 463 adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); 464 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); 465 adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); 466 adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); 467 adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); 468 adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / 469 le32_to_cpu(gc_info->gc_num_sa_per_se); 470 adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); 471 472 return 0; 473 } 474