1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/pci.h> 26 #include <linux/errno.h> 27 #include <linux/acpi.h> 28 #include <linux/hash.h> 29 #include <linux/cpufreq.h> 30 #include <linux/log2.h> 31 32 #include "kfd_priv.h" 33 #include "kfd_crat.h" 34 #include "kfd_topology.h" 35 36 static struct list_head topology_device_list; 37 static int topology_crat_parsed; 38 static struct kfd_system_properties sys_props; 39 40 static DECLARE_RWSEM(topology_lock); 41 42 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) 43 { 44 struct kfd_topology_device *top_dev; 45 struct kfd_dev *device = NULL; 46 47 down_read(&topology_lock); 48 49 list_for_each_entry(top_dev, &topology_device_list, list) 50 if (top_dev->gpu_id == gpu_id) { 51 device = top_dev->gpu; 52 break; 53 } 54 55 up_read(&topology_lock); 56 57 return device; 58 } 59 60 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) 61 { 62 struct kfd_topology_device *top_dev; 63 struct kfd_dev *device = NULL; 64 65 down_read(&topology_lock); 66 67 list_for_each_entry(top_dev, &topology_device_list, list) 68 if (top_dev->gpu->pdev == pdev) { 69 device = top_dev->gpu; 70 break; 71 } 72 73 up_read(&topology_lock); 74 75 return device; 76 } 77 78 static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) 79 { 80 struct acpi_table_header *crat_table; 81 acpi_status status; 82 83 if (!size) 84 return -EINVAL; 85 86 /* 87 * Fetch the CRAT table from ACPI 88 */ 89 status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); 90 if (status == AE_NOT_FOUND) { 91 pr_warn("CRAT table not found\n"); 92 return -ENODATA; 93 } else if (ACPI_FAILURE(status)) { 94 const char *err = acpi_format_exception(status); 95 96 pr_err("CRAT table error: %s\n", err); 97 return -EINVAL; 98 } 99 100 if (*size >= crat_table->length && crat_image != NULL) 101 memcpy(crat_image, crat_table, crat_table->length); 102 103 *size = crat_table->length; 104 105 return 0; 106 } 107 108 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, 109 struct crat_subtype_computeunit *cu) 110 { 111 dev->node_props.cpu_cores_count = cu->num_cpu_cores; 112 dev->node_props.cpu_core_id_base = cu->processor_id_low; 113 if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) 114 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 115 116 pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, 117 cu->processor_id_low); 118 } 119 120 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, 121 struct crat_subtype_computeunit *cu) 122 { 123 dev->node_props.simd_id_base = cu->processor_id_low; 124 dev->node_props.simd_count = cu->num_simd_cores; 125 dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; 126 dev->node_props.max_waves_per_simd = cu->max_waves_simd; 127 dev->node_props.wave_front_size = cu->wave_front_size; 128 dev->node_props.mem_banks_count = cu->num_banks; 129 dev->node_props.array_count = cu->num_arrays; 130 dev->node_props.cu_per_simd_array = cu->num_cu_per_array; 131 dev->node_props.simd_per_cu = cu->num_simd_per_cu; 132 dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; 133 if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) 134 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; 135 pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores, 136 cu->processor_id_low); 137 } 138 139 /* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ 140 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) 141 { 142 struct kfd_topology_device *dev; 143 int i = 0; 144 145 pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", 146 cu->proximity_domain, cu->hsa_capability); 147 list_for_each_entry(dev, &topology_device_list, list) { 148 if (cu->proximity_domain == i) { 149 if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) 150 kfd_populated_cu_info_cpu(dev, cu); 151 152 if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) 153 kfd_populated_cu_info_gpu(dev, cu); 154 break; 155 } 156 i++; 157 } 158 159 return 0; 160 } 161 162 /* 163 * kfd_parse_subtype_mem is called when the topology mutex is 164 * already acquired 165 */ 166 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) 167 { 168 struct kfd_mem_properties *props; 169 struct kfd_topology_device *dev; 170 int i = 0; 171 172 pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", 173 mem->promixity_domain); 174 list_for_each_entry(dev, &topology_device_list, list) { 175 if (mem->promixity_domain == i) { 176 props = kfd_alloc_struct(props); 177 if (props == NULL) 178 return -ENOMEM; 179 180 if (dev->node_props.cpu_cores_count == 0) 181 props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE; 182 else 183 props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; 184 185 if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) 186 props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; 187 if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) 188 props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; 189 190 props->size_in_bytes = 191 ((uint64_t)mem->length_high << 32) + 192 mem->length_low; 193 props->width = mem->width; 194 195 dev->mem_bank_count++; 196 list_add_tail(&props->list, &dev->mem_props); 197 198 break; 199 } 200 i++; 201 } 202 203 return 0; 204 } 205 206 /* 207 * kfd_parse_subtype_cache is called when the topology mutex 208 * is already acquired 209 */ 210 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) 211 { 212 struct kfd_cache_properties *props; 213 struct kfd_topology_device *dev; 214 uint32_t id; 215 216 id = cache->processor_id_low; 217 218 pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); 219 list_for_each_entry(dev, &topology_device_list, list) 220 if (id == dev->node_props.cpu_core_id_base || 221 id == dev->node_props.simd_id_base) { 222 props = kfd_alloc_struct(props); 223 if (props == NULL) 224 return -ENOMEM; 225 226 props->processor_id_low = id; 227 props->cache_level = cache->cache_level; 228 props->cache_size = cache->cache_size; 229 props->cacheline_size = cache->cache_line_size; 230 props->cachelines_per_tag = cache->lines_per_tag; 231 props->cache_assoc = cache->associativity; 232 props->cache_latency = cache->cache_latency; 233 234 if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) 235 props->cache_type |= HSA_CACHE_TYPE_DATA; 236 if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) 237 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 238 if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) 239 props->cache_type |= HSA_CACHE_TYPE_CPU; 240 if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 241 props->cache_type |= HSA_CACHE_TYPE_HSACU; 242 243 dev->cache_count++; 244 dev->node_props.caches_count++; 245 list_add_tail(&props->list, &dev->cache_props); 246 247 break; 248 } 249 250 return 0; 251 } 252 253 /* 254 * kfd_parse_subtype_iolink is called when the topology mutex 255 * is already acquired 256 */ 257 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) 258 { 259 struct kfd_iolink_properties *props; 260 struct kfd_topology_device *dev; 261 uint32_t i = 0; 262 uint32_t id_from; 263 uint32_t id_to; 264 265 id_from = iolink->proximity_domain_from; 266 id_to = iolink->proximity_domain_to; 267 268 pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from); 269 list_for_each_entry(dev, &topology_device_list, list) { 270 if (id_from == i) { 271 props = kfd_alloc_struct(props); 272 if (props == NULL) 273 return -ENOMEM; 274 275 props->node_from = id_from; 276 props->node_to = id_to; 277 props->ver_maj = iolink->version_major; 278 props->ver_min = iolink->version_minor; 279 280 /* 281 * weight factor (derived from CDIR), currently always 1 282 */ 283 props->weight = 1; 284 285 props->min_latency = iolink->minimum_latency; 286 props->max_latency = iolink->maximum_latency; 287 props->min_bandwidth = iolink->minimum_bandwidth_mbs; 288 props->max_bandwidth = iolink->maximum_bandwidth_mbs; 289 props->rec_transfer_size = 290 iolink->recommended_transfer_size; 291 292 dev->io_link_count++; 293 dev->node_props.io_links_count++; 294 list_add_tail(&props->list, &dev->io_link_props); 295 296 break; 297 } 298 i++; 299 } 300 301 return 0; 302 } 303 304 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) 305 { 306 struct crat_subtype_computeunit *cu; 307 struct crat_subtype_memory *mem; 308 struct crat_subtype_cache *cache; 309 struct crat_subtype_iolink *iolink; 310 int ret = 0; 311 312 switch (sub_type_hdr->type) { 313 case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: 314 cu = (struct crat_subtype_computeunit *)sub_type_hdr; 315 ret = kfd_parse_subtype_cu(cu); 316 break; 317 case CRAT_SUBTYPE_MEMORY_AFFINITY: 318 mem = (struct crat_subtype_memory *)sub_type_hdr; 319 ret = kfd_parse_subtype_mem(mem); 320 break; 321 case CRAT_SUBTYPE_CACHE_AFFINITY: 322 cache = (struct crat_subtype_cache *)sub_type_hdr; 323 ret = kfd_parse_subtype_cache(cache); 324 break; 325 case CRAT_SUBTYPE_TLB_AFFINITY: 326 /* 327 * For now, nothing to do here 328 */ 329 pr_info("Found TLB entry in CRAT table (not processing)\n"); 330 break; 331 case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: 332 /* 333 * For now, nothing to do here 334 */ 335 pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n"); 336 break; 337 case CRAT_SUBTYPE_IOLINK_AFFINITY: 338 iolink = (struct crat_subtype_iolink *)sub_type_hdr; 339 ret = kfd_parse_subtype_iolink(iolink); 340 break; 341 default: 342 pr_warn("Unknown subtype (%d) in CRAT\n", 343 sub_type_hdr->type); 344 } 345 346 return ret; 347 } 348 349 static void kfd_release_topology_device(struct kfd_topology_device *dev) 350 { 351 struct kfd_mem_properties *mem; 352 struct kfd_cache_properties *cache; 353 struct kfd_iolink_properties *iolink; 354 355 list_del(&dev->list); 356 357 while (dev->mem_props.next != &dev->mem_props) { 358 mem = container_of(dev->mem_props.next, 359 struct kfd_mem_properties, list); 360 list_del(&mem->list); 361 kfree(mem); 362 } 363 364 while (dev->cache_props.next != &dev->cache_props) { 365 cache = container_of(dev->cache_props.next, 366 struct kfd_cache_properties, list); 367 list_del(&cache->list); 368 kfree(cache); 369 } 370 371 while (dev->io_link_props.next != &dev->io_link_props) { 372 iolink = container_of(dev->io_link_props.next, 373 struct kfd_iolink_properties, list); 374 list_del(&iolink->list); 375 kfree(iolink); 376 } 377 378 kfree(dev); 379 380 sys_props.num_devices--; 381 } 382 383 static void kfd_release_live_view(void) 384 { 385 struct kfd_topology_device *dev; 386 387 while (topology_device_list.next != &topology_device_list) { 388 dev = container_of(topology_device_list.next, 389 struct kfd_topology_device, list); 390 kfd_release_topology_device(dev); 391 } 392 393 memset(&sys_props, 0, sizeof(sys_props)); 394 } 395 396 static struct kfd_topology_device *kfd_create_topology_device(void) 397 { 398 struct kfd_topology_device *dev; 399 400 dev = kfd_alloc_struct(dev); 401 if (!dev) { 402 pr_err("No memory to allocate a topology device"); 403 return NULL; 404 } 405 406 INIT_LIST_HEAD(&dev->mem_props); 407 INIT_LIST_HEAD(&dev->cache_props); 408 INIT_LIST_HEAD(&dev->io_link_props); 409 410 list_add_tail(&dev->list, &topology_device_list); 411 sys_props.num_devices++; 412 413 return dev; 414 } 415 416 static int kfd_parse_crat_table(void *crat_image) 417 { 418 struct kfd_topology_device *top_dev; 419 struct crat_subtype_generic *sub_type_hdr; 420 uint16_t node_id; 421 int ret; 422 struct crat_header *crat_table = (struct crat_header *)crat_image; 423 uint16_t num_nodes; 424 uint32_t image_len; 425 426 if (!crat_image) 427 return -EINVAL; 428 429 num_nodes = crat_table->num_domains; 430 image_len = crat_table->length; 431 432 pr_info("Parsing CRAT table with %d nodes\n", num_nodes); 433 434 for (node_id = 0; node_id < num_nodes; node_id++) { 435 top_dev = kfd_create_topology_device(); 436 if (!top_dev) { 437 kfd_release_live_view(); 438 return -ENOMEM; 439 } 440 } 441 442 sys_props.platform_id = 443 (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK; 444 sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id); 445 sys_props.platform_rev = crat_table->revision; 446 447 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 448 while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < 449 ((char *)crat_image) + image_len) { 450 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { 451 ret = kfd_parse_subtype(sub_type_hdr); 452 if (ret != 0) { 453 kfd_release_live_view(); 454 return ret; 455 } 456 } 457 458 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 459 sub_type_hdr->length); 460 } 461 462 sys_props.generation_count++; 463 topology_crat_parsed = 1; 464 465 return 0; 466 } 467 468 469 #define sysfs_show_gen_prop(buffer, fmt, ...) \ 470 snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) 471 #define sysfs_show_32bit_prop(buffer, name, value) \ 472 sysfs_show_gen_prop(buffer, "%s %u\n", name, value) 473 #define sysfs_show_64bit_prop(buffer, name, value) \ 474 sysfs_show_gen_prop(buffer, "%s %llu\n", name, value) 475 #define sysfs_show_32bit_val(buffer, value) \ 476 sysfs_show_gen_prop(buffer, "%u\n", value) 477 #define sysfs_show_str_val(buffer, value) \ 478 sysfs_show_gen_prop(buffer, "%s\n", value) 479 480 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, 481 char *buffer) 482 { 483 ssize_t ret; 484 485 /* Making sure that the buffer is an empty string */ 486 buffer[0] = 0; 487 488 if (attr == &sys_props.attr_genid) { 489 ret = sysfs_show_32bit_val(buffer, sys_props.generation_count); 490 } else if (attr == &sys_props.attr_props) { 491 sysfs_show_64bit_prop(buffer, "platform_oem", 492 sys_props.platform_oem); 493 sysfs_show_64bit_prop(buffer, "platform_id", 494 sys_props.platform_id); 495 ret = sysfs_show_64bit_prop(buffer, "platform_rev", 496 sys_props.platform_rev); 497 } else { 498 ret = -EINVAL; 499 } 500 501 return ret; 502 } 503 504 static const struct sysfs_ops sysprops_ops = { 505 .show = sysprops_show, 506 }; 507 508 static struct kobj_type sysprops_type = { 509 .sysfs_ops = &sysprops_ops, 510 }; 511 512 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, 513 char *buffer) 514 { 515 ssize_t ret; 516 struct kfd_iolink_properties *iolink; 517 518 /* Making sure that the buffer is an empty string */ 519 buffer[0] = 0; 520 521 iolink = container_of(attr, struct kfd_iolink_properties, attr); 522 sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type); 523 sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj); 524 sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min); 525 sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from); 526 sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to); 527 sysfs_show_32bit_prop(buffer, "weight", iolink->weight); 528 sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency); 529 sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency); 530 sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth); 531 sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth); 532 sysfs_show_32bit_prop(buffer, "recommended_transfer_size", 533 iolink->rec_transfer_size); 534 ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags); 535 536 return ret; 537 } 538 539 static const struct sysfs_ops iolink_ops = { 540 .show = iolink_show, 541 }; 542 543 static struct kobj_type iolink_type = { 544 .sysfs_ops = &iolink_ops, 545 }; 546 547 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, 548 char *buffer) 549 { 550 ssize_t ret; 551 struct kfd_mem_properties *mem; 552 553 /* Making sure that the buffer is an empty string */ 554 buffer[0] = 0; 555 556 mem = container_of(attr, struct kfd_mem_properties, attr); 557 sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type); 558 sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes); 559 sysfs_show_32bit_prop(buffer, "flags", mem->flags); 560 sysfs_show_32bit_prop(buffer, "width", mem->width); 561 ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max); 562 563 return ret; 564 } 565 566 static const struct sysfs_ops mem_ops = { 567 .show = mem_show, 568 }; 569 570 static struct kobj_type mem_type = { 571 .sysfs_ops = &mem_ops, 572 }; 573 574 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, 575 char *buffer) 576 { 577 ssize_t ret; 578 uint32_t i; 579 struct kfd_cache_properties *cache; 580 581 /* Making sure that the buffer is an empty string */ 582 buffer[0] = 0; 583 584 cache = container_of(attr, struct kfd_cache_properties, attr); 585 sysfs_show_32bit_prop(buffer, "processor_id_low", 586 cache->processor_id_low); 587 sysfs_show_32bit_prop(buffer, "level", cache->cache_level); 588 sysfs_show_32bit_prop(buffer, "size", cache->cache_size); 589 sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size); 590 sysfs_show_32bit_prop(buffer, "cache_lines_per_tag", 591 cache->cachelines_per_tag); 592 sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc); 593 sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency); 594 sysfs_show_32bit_prop(buffer, "type", cache->cache_type); 595 snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer); 596 for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++) 597 ret = snprintf(buffer, PAGE_SIZE, "%s%d%s", 598 buffer, cache->sibling_map[i], 599 (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ? 600 "\n" : ","); 601 602 return ret; 603 } 604 605 static const struct sysfs_ops cache_ops = { 606 .show = kfd_cache_show, 607 }; 608 609 static struct kobj_type cache_type = { 610 .sysfs_ops = &cache_ops, 611 }; 612 613 static ssize_t node_show(struct kobject *kobj, struct attribute *attr, 614 char *buffer) 615 { 616 struct kfd_topology_device *dev; 617 char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; 618 uint32_t i; 619 uint32_t log_max_watch_addr; 620 621 /* Making sure that the buffer is an empty string */ 622 buffer[0] = 0; 623 624 if (strcmp(attr->name, "gpu_id") == 0) { 625 dev = container_of(attr, struct kfd_topology_device, 626 attr_gpuid); 627 return sysfs_show_32bit_val(buffer, dev->gpu_id); 628 } 629 630 if (strcmp(attr->name, "name") == 0) { 631 dev = container_of(attr, struct kfd_topology_device, 632 attr_name); 633 for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) { 634 public_name[i] = 635 (char)dev->node_props.marketing_name[i]; 636 if (dev->node_props.marketing_name[i] == 0) 637 break; 638 } 639 public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0; 640 return sysfs_show_str_val(buffer, public_name); 641 } 642 643 dev = container_of(attr, struct kfd_topology_device, 644 attr_props); 645 sysfs_show_32bit_prop(buffer, "cpu_cores_count", 646 dev->node_props.cpu_cores_count); 647 sysfs_show_32bit_prop(buffer, "simd_count", 648 dev->node_props.simd_count); 649 650 if (dev->mem_bank_count < dev->node_props.mem_banks_count) { 651 pr_info_once("mem_banks_count truncated from %d to %d\n", 652 dev->node_props.mem_banks_count, 653 dev->mem_bank_count); 654 sysfs_show_32bit_prop(buffer, "mem_banks_count", 655 dev->mem_bank_count); 656 } else { 657 sysfs_show_32bit_prop(buffer, "mem_banks_count", 658 dev->node_props.mem_banks_count); 659 } 660 661 sysfs_show_32bit_prop(buffer, "caches_count", 662 dev->node_props.caches_count); 663 sysfs_show_32bit_prop(buffer, "io_links_count", 664 dev->node_props.io_links_count); 665 sysfs_show_32bit_prop(buffer, "cpu_core_id_base", 666 dev->node_props.cpu_core_id_base); 667 sysfs_show_32bit_prop(buffer, "simd_id_base", 668 dev->node_props.simd_id_base); 669 sysfs_show_32bit_prop(buffer, "max_waves_per_simd", 670 dev->node_props.max_waves_per_simd); 671 sysfs_show_32bit_prop(buffer, "lds_size_in_kb", 672 dev->node_props.lds_size_in_kb); 673 sysfs_show_32bit_prop(buffer, "gds_size_in_kb", 674 dev->node_props.gds_size_in_kb); 675 sysfs_show_32bit_prop(buffer, "wave_front_size", 676 dev->node_props.wave_front_size); 677 sysfs_show_32bit_prop(buffer, "array_count", 678 dev->node_props.array_count); 679 sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine", 680 dev->node_props.simd_arrays_per_engine); 681 sysfs_show_32bit_prop(buffer, "cu_per_simd_array", 682 dev->node_props.cu_per_simd_array); 683 sysfs_show_32bit_prop(buffer, "simd_per_cu", 684 dev->node_props.simd_per_cu); 685 sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", 686 dev->node_props.max_slots_scratch_cu); 687 sysfs_show_32bit_prop(buffer, "vendor_id", 688 dev->node_props.vendor_id); 689 sysfs_show_32bit_prop(buffer, "device_id", 690 dev->node_props.device_id); 691 sysfs_show_32bit_prop(buffer, "location_id", 692 dev->node_props.location_id); 693 694 if (dev->gpu) { 695 log_max_watch_addr = 696 __ilog2_u32(dev->gpu->device_info->num_of_watch_points); 697 698 if (log_max_watch_addr) { 699 dev->node_props.capability |= 700 HSA_CAP_WATCH_POINTS_SUPPORTED; 701 702 dev->node_props.capability |= 703 ((log_max_watch_addr << 704 HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & 705 HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); 706 } 707 708 sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", 709 dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz( 710 dev->gpu->kgd)); 711 712 sysfs_show_64bit_prop(buffer, "local_mem_size", 713 (unsigned long long int) 0); 714 715 sysfs_show_32bit_prop(buffer, "fw_version", 716 dev->gpu->kfd2kgd->get_fw_version( 717 dev->gpu->kgd, 718 KGD_ENGINE_MEC1)); 719 sysfs_show_32bit_prop(buffer, "capability", 720 dev->node_props.capability); 721 } 722 723 return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", 724 cpufreq_quick_get_max(0)/1000); 725 } 726 727 static const struct sysfs_ops node_ops = { 728 .show = node_show, 729 }; 730 731 static struct kobj_type node_type = { 732 .sysfs_ops = &node_ops, 733 }; 734 735 static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) 736 { 737 sysfs_remove_file(kobj, attr); 738 kobject_del(kobj); 739 kobject_put(kobj); 740 } 741 742 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) 743 { 744 struct kfd_iolink_properties *iolink; 745 struct kfd_cache_properties *cache; 746 struct kfd_mem_properties *mem; 747 748 if (dev->kobj_iolink) { 749 list_for_each_entry(iolink, &dev->io_link_props, list) 750 if (iolink->kobj) { 751 kfd_remove_sysfs_file(iolink->kobj, 752 &iolink->attr); 753 iolink->kobj = NULL; 754 } 755 kobject_del(dev->kobj_iolink); 756 kobject_put(dev->kobj_iolink); 757 dev->kobj_iolink = NULL; 758 } 759 760 if (dev->kobj_cache) { 761 list_for_each_entry(cache, &dev->cache_props, list) 762 if (cache->kobj) { 763 kfd_remove_sysfs_file(cache->kobj, 764 &cache->attr); 765 cache->kobj = NULL; 766 } 767 kobject_del(dev->kobj_cache); 768 kobject_put(dev->kobj_cache); 769 dev->kobj_cache = NULL; 770 } 771 772 if (dev->kobj_mem) { 773 list_for_each_entry(mem, &dev->mem_props, list) 774 if (mem->kobj) { 775 kfd_remove_sysfs_file(mem->kobj, &mem->attr); 776 mem->kobj = NULL; 777 } 778 kobject_del(dev->kobj_mem); 779 kobject_put(dev->kobj_mem); 780 dev->kobj_mem = NULL; 781 } 782 783 if (dev->kobj_node) { 784 sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); 785 sysfs_remove_file(dev->kobj_node, &dev->attr_name); 786 sysfs_remove_file(dev->kobj_node, &dev->attr_props); 787 kobject_del(dev->kobj_node); 788 kobject_put(dev->kobj_node); 789 dev->kobj_node = NULL; 790 } 791 } 792 793 static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, 794 uint32_t id) 795 { 796 struct kfd_iolink_properties *iolink; 797 struct kfd_cache_properties *cache; 798 struct kfd_mem_properties *mem; 799 int ret; 800 uint32_t i; 801 802 if (WARN_ON(dev->kobj_node)) 803 return -EEXIST; 804 805 /* 806 * Creating the sysfs folders 807 */ 808 dev->kobj_node = kfd_alloc_struct(dev->kobj_node); 809 if (!dev->kobj_node) 810 return -ENOMEM; 811 812 ret = kobject_init_and_add(dev->kobj_node, &node_type, 813 sys_props.kobj_nodes, "%d", id); 814 if (ret < 0) 815 return ret; 816 817 dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); 818 if (!dev->kobj_mem) 819 return -ENOMEM; 820 821 dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); 822 if (!dev->kobj_cache) 823 return -ENOMEM; 824 825 dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); 826 if (!dev->kobj_iolink) 827 return -ENOMEM; 828 829 /* 830 * Creating sysfs files for node properties 831 */ 832 dev->attr_gpuid.name = "gpu_id"; 833 dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; 834 sysfs_attr_init(&dev->attr_gpuid); 835 dev->attr_name.name = "name"; 836 dev->attr_name.mode = KFD_SYSFS_FILE_MODE; 837 sysfs_attr_init(&dev->attr_name); 838 dev->attr_props.name = "properties"; 839 dev->attr_props.mode = KFD_SYSFS_FILE_MODE; 840 sysfs_attr_init(&dev->attr_props); 841 ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); 842 if (ret < 0) 843 return ret; 844 ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); 845 if (ret < 0) 846 return ret; 847 ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); 848 if (ret < 0) 849 return ret; 850 851 i = 0; 852 list_for_each_entry(mem, &dev->mem_props, list) { 853 mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 854 if (!mem->kobj) 855 return -ENOMEM; 856 ret = kobject_init_and_add(mem->kobj, &mem_type, 857 dev->kobj_mem, "%d", i); 858 if (ret < 0) 859 return ret; 860 861 mem->attr.name = "properties"; 862 mem->attr.mode = KFD_SYSFS_FILE_MODE; 863 sysfs_attr_init(&mem->attr); 864 ret = sysfs_create_file(mem->kobj, &mem->attr); 865 if (ret < 0) 866 return ret; 867 i++; 868 } 869 870 i = 0; 871 list_for_each_entry(cache, &dev->cache_props, list) { 872 cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 873 if (!cache->kobj) 874 return -ENOMEM; 875 ret = kobject_init_and_add(cache->kobj, &cache_type, 876 dev->kobj_cache, "%d", i); 877 if (ret < 0) 878 return ret; 879 880 cache->attr.name = "properties"; 881 cache->attr.mode = KFD_SYSFS_FILE_MODE; 882 sysfs_attr_init(&cache->attr); 883 ret = sysfs_create_file(cache->kobj, &cache->attr); 884 if (ret < 0) 885 return ret; 886 i++; 887 } 888 889 i = 0; 890 list_for_each_entry(iolink, &dev->io_link_props, list) { 891 iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 892 if (!iolink->kobj) 893 return -ENOMEM; 894 ret = kobject_init_and_add(iolink->kobj, &iolink_type, 895 dev->kobj_iolink, "%d", i); 896 if (ret < 0) 897 return ret; 898 899 iolink->attr.name = "properties"; 900 iolink->attr.mode = KFD_SYSFS_FILE_MODE; 901 sysfs_attr_init(&iolink->attr); 902 ret = sysfs_create_file(iolink->kobj, &iolink->attr); 903 if (ret < 0) 904 return ret; 905 i++; 906 } 907 908 return 0; 909 } 910 911 static int kfd_build_sysfs_node_tree(void) 912 { 913 struct kfd_topology_device *dev; 914 int ret; 915 uint32_t i = 0; 916 917 list_for_each_entry(dev, &topology_device_list, list) { 918 ret = kfd_build_sysfs_node_entry(dev, i); 919 if (ret < 0) 920 return ret; 921 i++; 922 } 923 924 return 0; 925 } 926 927 static void kfd_remove_sysfs_node_tree(void) 928 { 929 struct kfd_topology_device *dev; 930 931 list_for_each_entry(dev, &topology_device_list, list) 932 kfd_remove_sysfs_node_entry(dev); 933 } 934 935 static int kfd_topology_update_sysfs(void) 936 { 937 int ret; 938 939 pr_info("Creating topology SYSFS entries\n"); 940 if (!sys_props.kobj_topology) { 941 sys_props.kobj_topology = 942 kfd_alloc_struct(sys_props.kobj_topology); 943 if (!sys_props.kobj_topology) 944 return -ENOMEM; 945 946 ret = kobject_init_and_add(sys_props.kobj_topology, 947 &sysprops_type, &kfd_device->kobj, 948 "topology"); 949 if (ret < 0) 950 return ret; 951 952 sys_props.kobj_nodes = kobject_create_and_add("nodes", 953 sys_props.kobj_topology); 954 if (!sys_props.kobj_nodes) 955 return -ENOMEM; 956 957 sys_props.attr_genid.name = "generation_id"; 958 sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; 959 sysfs_attr_init(&sys_props.attr_genid); 960 ret = sysfs_create_file(sys_props.kobj_topology, 961 &sys_props.attr_genid); 962 if (ret < 0) 963 return ret; 964 965 sys_props.attr_props.name = "system_properties"; 966 sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; 967 sysfs_attr_init(&sys_props.attr_props); 968 ret = sysfs_create_file(sys_props.kobj_topology, 969 &sys_props.attr_props); 970 if (ret < 0) 971 return ret; 972 } 973 974 kfd_remove_sysfs_node_tree(); 975 976 return kfd_build_sysfs_node_tree(); 977 } 978 979 static void kfd_topology_release_sysfs(void) 980 { 981 kfd_remove_sysfs_node_tree(); 982 if (sys_props.kobj_topology) { 983 sysfs_remove_file(sys_props.kobj_topology, 984 &sys_props.attr_genid); 985 sysfs_remove_file(sys_props.kobj_topology, 986 &sys_props.attr_props); 987 if (sys_props.kobj_nodes) { 988 kobject_del(sys_props.kobj_nodes); 989 kobject_put(sys_props.kobj_nodes); 990 sys_props.kobj_nodes = NULL; 991 } 992 kobject_del(sys_props.kobj_topology); 993 kobject_put(sys_props.kobj_topology); 994 sys_props.kobj_topology = NULL; 995 } 996 } 997 998 int kfd_topology_init(void) 999 { 1000 void *crat_image = NULL; 1001 size_t image_size = 0; 1002 int ret; 1003 1004 /* 1005 * Initialize the head for the topology device list 1006 */ 1007 INIT_LIST_HEAD(&topology_device_list); 1008 init_rwsem(&topology_lock); 1009 topology_crat_parsed = 0; 1010 1011 memset(&sys_props, 0, sizeof(sys_props)); 1012 1013 /* 1014 * Get the CRAT image from the ACPI 1015 */ 1016 ret = kfd_topology_get_crat_acpi(crat_image, &image_size); 1017 if (ret == 0 && image_size > 0) { 1018 pr_info("Found CRAT image with size=%zd\n", image_size); 1019 crat_image = kmalloc(image_size, GFP_KERNEL); 1020 if (!crat_image) { 1021 ret = -ENOMEM; 1022 pr_err("No memory for allocating CRAT image\n"); 1023 goto err; 1024 } 1025 ret = kfd_topology_get_crat_acpi(crat_image, &image_size); 1026 1027 if (ret == 0) { 1028 down_write(&topology_lock); 1029 ret = kfd_parse_crat_table(crat_image); 1030 if (ret == 0) 1031 ret = kfd_topology_update_sysfs(); 1032 up_write(&topology_lock); 1033 } else { 1034 pr_err("Couldn't get CRAT table size from ACPI\n"); 1035 } 1036 kfree(crat_image); 1037 } else if (ret == -ENODATA) { 1038 ret = 0; 1039 } else { 1040 pr_err("Couldn't get CRAT table size from ACPI\n"); 1041 } 1042 1043 err: 1044 pr_info("Finished initializing topology ret=%d\n", ret); 1045 return ret; 1046 } 1047 1048 void kfd_topology_shutdown(void) 1049 { 1050 kfd_topology_release_sysfs(); 1051 kfd_release_live_view(); 1052 } 1053 1054 static void kfd_debug_print_topology(void) 1055 { 1056 struct kfd_topology_device *dev; 1057 uint32_t i = 0; 1058 1059 pr_info("DEBUG PRINT OF TOPOLOGY:"); 1060 list_for_each_entry(dev, &topology_device_list, list) { 1061 pr_info("Node: %d\n", i); 1062 pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no")); 1063 pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count); 1064 pr_info("\tSIMD count: %d", dev->node_props.simd_count); 1065 i++; 1066 } 1067 } 1068 1069 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) 1070 { 1071 uint32_t hashout; 1072 uint32_t buf[7]; 1073 uint64_t local_mem_size; 1074 int i; 1075 1076 if (!gpu) 1077 return 0; 1078 1079 local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd); 1080 1081 buf[0] = gpu->pdev->devfn; 1082 buf[1] = gpu->pdev->subsystem_vendor; 1083 buf[2] = gpu->pdev->subsystem_device; 1084 buf[3] = gpu->pdev->device; 1085 buf[4] = gpu->pdev->bus->number; 1086 buf[5] = lower_32_bits(local_mem_size); 1087 buf[6] = upper_32_bits(local_mem_size); 1088 1089 for (i = 0, hashout = 0; i < 7; i++) 1090 hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); 1091 1092 return hashout; 1093 } 1094 1095 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) 1096 { 1097 struct kfd_topology_device *dev; 1098 struct kfd_topology_device *out_dev = NULL; 1099 1100 list_for_each_entry(dev, &topology_device_list, list) 1101 if (!dev->gpu && (dev->node_props.simd_count > 0)) { 1102 dev->gpu = gpu; 1103 out_dev = dev; 1104 break; 1105 } 1106 1107 return out_dev; 1108 } 1109 1110 static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) 1111 { 1112 /* 1113 * TODO: Generate an event for thunk about the arrival/removal 1114 * of the GPU 1115 */ 1116 } 1117 1118 int kfd_topology_add_device(struct kfd_dev *gpu) 1119 { 1120 uint32_t gpu_id; 1121 struct kfd_topology_device *dev; 1122 int res; 1123 1124 gpu_id = kfd_generate_gpu_id(gpu); 1125 1126 pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); 1127 1128 down_write(&topology_lock); 1129 /* 1130 * Try to assign the GPU to existing topology device (generated from 1131 * CRAT table 1132 */ 1133 dev = kfd_assign_gpu(gpu); 1134 if (!dev) { 1135 pr_info("GPU was not found in the current topology. Extending.\n"); 1136 kfd_debug_print_topology(); 1137 dev = kfd_create_topology_device(); 1138 if (!dev) { 1139 res = -ENOMEM; 1140 goto err; 1141 } 1142 dev->gpu = gpu; 1143 1144 /* 1145 * TODO: Make a call to retrieve topology information from the 1146 * GPU vBIOS 1147 */ 1148 1149 /* Update the SYSFS tree, since we added another topology 1150 * device 1151 */ 1152 if (kfd_topology_update_sysfs() < 0) 1153 kfd_topology_release_sysfs(); 1154 1155 } 1156 1157 dev->gpu_id = gpu_id; 1158 gpu->id = gpu_id; 1159 dev->node_props.vendor_id = gpu->pdev->vendor; 1160 dev->node_props.device_id = gpu->pdev->device; 1161 dev->node_props.location_id = (gpu->pdev->bus->number << 24) + 1162 (gpu->pdev->devfn & 0xffffff); 1163 /* 1164 * TODO: Retrieve max engine clock values from KGD 1165 */ 1166 1167 if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { 1168 dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; 1169 pr_info("Adding doorbell packet type capability\n"); 1170 } 1171 1172 res = 0; 1173 1174 err: 1175 up_write(&topology_lock); 1176 1177 if (res == 0) 1178 kfd_notify_gpu_change(gpu_id, 1); 1179 1180 return res; 1181 } 1182 1183 int kfd_topology_remove_device(struct kfd_dev *gpu) 1184 { 1185 struct kfd_topology_device *dev; 1186 uint32_t gpu_id; 1187 int res = -ENODEV; 1188 1189 down_write(&topology_lock); 1190 1191 list_for_each_entry(dev, &topology_device_list, list) 1192 if (dev->gpu == gpu) { 1193 gpu_id = dev->gpu_id; 1194 kfd_remove_sysfs_node_entry(dev); 1195 kfd_release_topology_device(dev); 1196 res = 0; 1197 if (kfd_topology_update_sysfs() < 0) 1198 kfd_topology_release_sysfs(); 1199 break; 1200 } 1201 1202 up_write(&topology_lock); 1203 1204 if (res == 0) 1205 kfd_notify_gpu_change(gpu_id, 0); 1206 1207 return res; 1208 } 1209 1210 /* 1211 * When idx is out of bounds, the function will return NULL 1212 */ 1213 struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) 1214 { 1215 1216 struct kfd_topology_device *top_dev; 1217 struct kfd_dev *device = NULL; 1218 uint8_t device_idx = 0; 1219 1220 down_read(&topology_lock); 1221 1222 list_for_each_entry(top_dev, &topology_device_list, list) { 1223 if (device_idx == idx) { 1224 device = top_dev->gpu; 1225 break; 1226 } 1227 1228 device_idx++; 1229 } 1230 1231 up_read(&topology_lock); 1232 1233 return device; 1234 1235 } 1236