1d87f36a0SRajneesh Bhardwaj // SPDX-License-Identifier: GPL-2.0 OR MIT 25b5c4e40SEvgeny Pinchuk /* 3d87f36a0SRajneesh Bhardwaj * Copyright 2014-2022 Advanced Micro Devices, Inc. 45b5c4e40SEvgeny Pinchuk * 55b5c4e40SEvgeny Pinchuk * Permission is hereby granted, free of charge, to any person obtaining a 65b5c4e40SEvgeny Pinchuk * copy of this software and associated documentation files (the "Software"), 75b5c4e40SEvgeny Pinchuk * to deal in the Software without restriction, including without limitation 85b5c4e40SEvgeny Pinchuk * the rights to use, copy, modify, merge, publish, distribute, sublicense, 95b5c4e40SEvgeny Pinchuk * and/or sell copies of the Software, and to permit persons to whom the 105b5c4e40SEvgeny Pinchuk * Software is furnished to do so, subject to the following conditions: 115b5c4e40SEvgeny Pinchuk * 125b5c4e40SEvgeny Pinchuk * The above copyright notice and this permission notice shall be included in 135b5c4e40SEvgeny Pinchuk * all copies or substantial portions of the Software. 145b5c4e40SEvgeny Pinchuk * 155b5c4e40SEvgeny Pinchuk * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 165b5c4e40SEvgeny Pinchuk * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 175b5c4e40SEvgeny Pinchuk * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 185b5c4e40SEvgeny Pinchuk * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 195b5c4e40SEvgeny Pinchuk * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 205b5c4e40SEvgeny Pinchuk * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 215b5c4e40SEvgeny Pinchuk * OTHER DEALINGS IN THE SOFTWARE. 225b5c4e40SEvgeny Pinchuk */ 235b5c4e40SEvgeny Pinchuk 245b5c4e40SEvgeny Pinchuk #include <linux/types.h> 255b5c4e40SEvgeny Pinchuk #include <linux/kernel.h> 265b5c4e40SEvgeny Pinchuk #include <linux/pci.h> 275b5c4e40SEvgeny Pinchuk #include <linux/errno.h> 285b5c4e40SEvgeny Pinchuk #include <linux/acpi.h> 295b5c4e40SEvgeny Pinchuk #include <linux/hash.h> 305b5c4e40SEvgeny Pinchuk #include <linux/cpufreq.h> 31f7c826adSAlexey Skidanov #include <linux/log2.h> 32520b8fb7SFelix Kuehling #include <linux/dmi.h> 33520b8fb7SFelix Kuehling #include <linux/atomic.h> 345b5c4e40SEvgeny Pinchuk 355b5c4e40SEvgeny Pinchuk #include "kfd_priv.h" 365b5c4e40SEvgeny Pinchuk #include "kfd_crat.h" 375b5c4e40SEvgeny Pinchuk #include "kfd_topology.h" 38851a645eSFelix Kuehling #include "kfd_device_queue_manager.h" 3964d1c3a4SFelix Kuehling #include "kfd_iommu.h" 405a75ea56SFelix Kuehling #include "kfd_svm.h" 415b87245fSAmber Lin #include "amdgpu_amdkfd.h" 420dee45a2SEric Huang #include "amdgpu_ras.h" 430f28cca8SRamesh Errabolu #include "amdgpu.h" 445b5c4e40SEvgeny Pinchuk 454f449311SHarish Kasiviswanathan /* topology_device_list - Master list of all topology devices */ 464f449311SHarish Kasiviswanathan static struct list_head topology_device_list; 47520b8fb7SFelix Kuehling static struct kfd_system_properties sys_props; 485b5c4e40SEvgeny Pinchuk 495b5c4e40SEvgeny Pinchuk static DECLARE_RWSEM(topology_lock); 5046d18d51SMukul Joshi static uint32_t topology_crat_proximity_domain; 515b5c4e40SEvgeny Pinchuk 5246d18d51SMukul Joshi struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( 533a87177eSHarish Kasiviswanathan uint32_t proximity_domain) 543a87177eSHarish Kasiviswanathan { 553a87177eSHarish Kasiviswanathan struct kfd_topology_device *top_dev; 563a87177eSHarish Kasiviswanathan struct kfd_topology_device *device = NULL; 573a87177eSHarish Kasiviswanathan 583a87177eSHarish Kasiviswanathan list_for_each_entry(top_dev, &topology_device_list, list) 593a87177eSHarish Kasiviswanathan if (top_dev->proximity_domain == proximity_domain) { 603a87177eSHarish Kasiviswanathan device = top_dev; 613a87177eSHarish Kasiviswanathan break; 623a87177eSHarish Kasiviswanathan } 633a87177eSHarish Kasiviswanathan 6446d18d51SMukul Joshi return device; 6546d18d51SMukul Joshi } 6646d18d51SMukul Joshi 6746d18d51SMukul Joshi struct kfd_topology_device *kfd_topology_device_by_proximity_domain( 6846d18d51SMukul Joshi uint32_t proximity_domain) 6946d18d51SMukul Joshi { 7046d18d51SMukul Joshi struct kfd_topology_device *device = NULL; 7146d18d51SMukul Joshi 7246d18d51SMukul Joshi down_read(&topology_lock); 7346d18d51SMukul Joshi 7446d18d51SMukul Joshi device = kfd_topology_device_by_proximity_domain_no_lock( 7546d18d51SMukul Joshi proximity_domain); 763a87177eSHarish Kasiviswanathan up_read(&topology_lock); 773a87177eSHarish Kasiviswanathan 783a87177eSHarish Kasiviswanathan return device; 793a87177eSHarish Kasiviswanathan } 803a87177eSHarish Kasiviswanathan 8144d8cc6fSYong Zhao struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) 825b5c4e40SEvgeny Pinchuk { 8344d8cc6fSYong Zhao struct kfd_topology_device *top_dev = NULL; 8444d8cc6fSYong Zhao struct kfd_topology_device *ret = NULL; 855b5c4e40SEvgeny Pinchuk 865b5c4e40SEvgeny Pinchuk down_read(&topology_lock); 875b5c4e40SEvgeny Pinchuk 885b5c4e40SEvgeny Pinchuk list_for_each_entry(top_dev, &topology_device_list, list) 895b5c4e40SEvgeny Pinchuk if (top_dev->gpu_id == gpu_id) { 9044d8cc6fSYong Zhao ret = top_dev; 915b5c4e40SEvgeny Pinchuk break; 925b5c4e40SEvgeny Pinchuk } 935b5c4e40SEvgeny Pinchuk 945b5c4e40SEvgeny Pinchuk up_read(&topology_lock); 955b5c4e40SEvgeny Pinchuk 9644d8cc6fSYong Zhao return ret; 9744d8cc6fSYong Zhao } 9844d8cc6fSYong Zhao 998dc1db31SMukul Joshi struct kfd_node *kfd_device_by_id(uint32_t gpu_id) 10044d8cc6fSYong Zhao { 10144d8cc6fSYong Zhao struct kfd_topology_device *top_dev; 10244d8cc6fSYong Zhao 10344d8cc6fSYong Zhao top_dev = kfd_topology_device_by_id(gpu_id); 10444d8cc6fSYong Zhao if (!top_dev) 10544d8cc6fSYong Zhao return NULL; 10644d8cc6fSYong Zhao 10744d8cc6fSYong Zhao return top_dev->gpu; 1085b5c4e40SEvgeny Pinchuk } 1095b5c4e40SEvgeny Pinchuk 1108dc1db31SMukul Joshi struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) 1115b5c4e40SEvgeny Pinchuk { 1125b5c4e40SEvgeny Pinchuk struct kfd_topology_device *top_dev; 1138dc1db31SMukul Joshi struct kfd_node *device = NULL; 1145b5c4e40SEvgeny Pinchuk 1155b5c4e40SEvgeny Pinchuk down_read(&topology_lock); 1165b5c4e40SEvgeny Pinchuk 1175b5c4e40SEvgeny Pinchuk list_for_each_entry(top_dev, &topology_device_list, list) 118d69a3b76SMukul Joshi if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) { 1195b5c4e40SEvgeny Pinchuk device = top_dev->gpu; 1205b5c4e40SEvgeny Pinchuk break; 1215b5c4e40SEvgeny Pinchuk } 1225b5c4e40SEvgeny Pinchuk 1235b5c4e40SEvgeny Pinchuk up_read(&topology_lock); 1245b5c4e40SEvgeny Pinchuk 1255b5c4e40SEvgeny Pinchuk return device; 1265b5c4e40SEvgeny Pinchuk } 1275b5c4e40SEvgeny Pinchuk 1288dc1db31SMukul Joshi struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev) 1291dde0ea9SFelix Kuehling { 1301dde0ea9SFelix Kuehling struct kfd_topology_device *top_dev; 1318dc1db31SMukul Joshi struct kfd_node *device = NULL; 1321dde0ea9SFelix Kuehling 1331dde0ea9SFelix Kuehling down_read(&topology_lock); 1341dde0ea9SFelix Kuehling 1351dde0ea9SFelix Kuehling list_for_each_entry(top_dev, &topology_device_list, list) 136574c4183SGraham Sider if (top_dev->gpu && top_dev->gpu->adev == adev) { 1371dde0ea9SFelix Kuehling device = top_dev->gpu; 1381dde0ea9SFelix Kuehling break; 1391dde0ea9SFelix Kuehling } 1401dde0ea9SFelix Kuehling 1411dde0ea9SFelix Kuehling up_read(&topology_lock); 1421dde0ea9SFelix Kuehling 1431dde0ea9SFelix Kuehling return device; 1441dde0ea9SFelix Kuehling } 1451dde0ea9SFelix Kuehling 1463a87177eSHarish Kasiviswanathan /* Called with write topology_lock acquired */ 1475b5c4e40SEvgeny Pinchuk static void kfd_release_topology_device(struct kfd_topology_device *dev) 1485b5c4e40SEvgeny Pinchuk { 1495b5c4e40SEvgeny Pinchuk struct kfd_mem_properties *mem; 1505b5c4e40SEvgeny Pinchuk struct kfd_cache_properties *cache; 1515b5c4e40SEvgeny Pinchuk struct kfd_iolink_properties *iolink; 1520f28cca8SRamesh Errabolu struct kfd_iolink_properties *p2plink; 153f4757347SAmber Lin struct kfd_perf_properties *perf; 1545b5c4e40SEvgeny Pinchuk 1555b5c4e40SEvgeny Pinchuk list_del(&dev->list); 1565b5c4e40SEvgeny Pinchuk 1575b5c4e40SEvgeny Pinchuk while (dev->mem_props.next != &dev->mem_props) { 1585b5c4e40SEvgeny Pinchuk mem = container_of(dev->mem_props.next, 1595b5c4e40SEvgeny Pinchuk struct kfd_mem_properties, list); 1605b5c4e40SEvgeny Pinchuk list_del(&mem->list); 1615b5c4e40SEvgeny Pinchuk kfree(mem); 1625b5c4e40SEvgeny Pinchuk } 1635b5c4e40SEvgeny Pinchuk 1645b5c4e40SEvgeny Pinchuk while (dev->cache_props.next != &dev->cache_props) { 1655b5c4e40SEvgeny Pinchuk cache = container_of(dev->cache_props.next, 1665b5c4e40SEvgeny Pinchuk struct kfd_cache_properties, list); 1675b5c4e40SEvgeny Pinchuk list_del(&cache->list); 1685b5c4e40SEvgeny Pinchuk kfree(cache); 1695b5c4e40SEvgeny Pinchuk } 1705b5c4e40SEvgeny Pinchuk 1715b5c4e40SEvgeny Pinchuk while (dev->io_link_props.next != &dev->io_link_props) { 1725b5c4e40SEvgeny Pinchuk iolink = container_of(dev->io_link_props.next, 1735b5c4e40SEvgeny Pinchuk struct kfd_iolink_properties, list); 1745b5c4e40SEvgeny Pinchuk list_del(&iolink->list); 1755b5c4e40SEvgeny Pinchuk kfree(iolink); 1765b5c4e40SEvgeny Pinchuk } 1775b5c4e40SEvgeny Pinchuk 1780f28cca8SRamesh Errabolu while (dev->p2p_link_props.next != &dev->p2p_link_props) { 1790f28cca8SRamesh Errabolu p2plink = container_of(dev->p2p_link_props.next, 1800f28cca8SRamesh Errabolu struct kfd_iolink_properties, list); 1810f28cca8SRamesh Errabolu list_del(&p2plink->list); 1820f28cca8SRamesh Errabolu kfree(p2plink); 1830f28cca8SRamesh Errabolu } 1840f28cca8SRamesh Errabolu 185f4757347SAmber Lin while (dev->perf_props.next != &dev->perf_props) { 186f4757347SAmber Lin perf = container_of(dev->perf_props.next, 187f4757347SAmber Lin struct kfd_perf_properties, list); 188f4757347SAmber Lin list_del(&perf->list); 189f4757347SAmber Lin kfree(perf); 190f4757347SAmber Lin } 191f4757347SAmber Lin 1925b5c4e40SEvgeny Pinchuk kfree(dev); 1935b5c4e40SEvgeny Pinchuk } 1945b5c4e40SEvgeny Pinchuk 1954f449311SHarish Kasiviswanathan void kfd_release_topology_device_list(struct list_head *device_list) 1965b5c4e40SEvgeny Pinchuk { 1975b5c4e40SEvgeny Pinchuk struct kfd_topology_device *dev; 1985b5c4e40SEvgeny Pinchuk 1994f449311SHarish Kasiviswanathan while (!list_empty(device_list)) { 2004f449311SHarish Kasiviswanathan dev = list_first_entry(device_list, 2015b5c4e40SEvgeny Pinchuk struct kfd_topology_device, list); 2025b5c4e40SEvgeny Pinchuk kfd_release_topology_device(dev); 2035b5c4e40SEvgeny Pinchuk } 2044f449311SHarish Kasiviswanathan } 2055b5c4e40SEvgeny Pinchuk 2064f449311SHarish Kasiviswanathan static void kfd_release_live_view(void) 2074f449311SHarish Kasiviswanathan { 2084f449311SHarish Kasiviswanathan kfd_release_topology_device_list(&topology_device_list); 2095b5c4e40SEvgeny Pinchuk memset(&sys_props, 0, sizeof(sys_props)); 2105b5c4e40SEvgeny Pinchuk } 2115b5c4e40SEvgeny Pinchuk 2124f449311SHarish Kasiviswanathan struct kfd_topology_device *kfd_create_topology_device( 2134f449311SHarish Kasiviswanathan struct list_head *device_list) 2145b5c4e40SEvgeny Pinchuk { 2155b5c4e40SEvgeny Pinchuk struct kfd_topology_device *dev; 2165b5c4e40SEvgeny Pinchuk 2175b5c4e40SEvgeny Pinchuk dev = kfd_alloc_struct(dev); 2184eacc26bSKent Russell if (!dev) { 2195b5c4e40SEvgeny Pinchuk pr_err("No memory to allocate a topology device"); 22016b9201cSOded Gabbay return NULL; 2215b5c4e40SEvgeny Pinchuk } 2225b5c4e40SEvgeny Pinchuk 2235b5c4e40SEvgeny Pinchuk INIT_LIST_HEAD(&dev->mem_props); 2245b5c4e40SEvgeny Pinchuk INIT_LIST_HEAD(&dev->cache_props); 2255b5c4e40SEvgeny Pinchuk INIT_LIST_HEAD(&dev->io_link_props); 2260f28cca8SRamesh Errabolu INIT_LIST_HEAD(&dev->p2p_link_props); 227f4757347SAmber Lin INIT_LIST_HEAD(&dev->perf_props); 2285b5c4e40SEvgeny Pinchuk 2294f449311SHarish Kasiviswanathan list_add_tail(&dev->list, device_list); 2305b5c4e40SEvgeny Pinchuk 2315b5c4e40SEvgeny Pinchuk return dev; 2325b5c4e40SEvgeny Pinchuk } 2335b5c4e40SEvgeny Pinchuk 2345b5c4e40SEvgeny Pinchuk 23583a13ef5SFelix Kuehling #define sysfs_show_gen_prop(buffer, offs, fmt, ...) \ 23683a13ef5SFelix Kuehling (offs += snprintf(buffer+offs, PAGE_SIZE-offs, \ 23783a13ef5SFelix Kuehling fmt, __VA_ARGS__)) 23883a13ef5SFelix Kuehling #define sysfs_show_32bit_prop(buffer, offs, name, value) \ 23983a13ef5SFelix Kuehling sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value) 24083a13ef5SFelix Kuehling #define sysfs_show_64bit_prop(buffer, offs, name, value) \ 24183a13ef5SFelix Kuehling sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value) 24283a13ef5SFelix Kuehling #define sysfs_show_32bit_val(buffer, offs, value) \ 24383a13ef5SFelix Kuehling sysfs_show_gen_prop(buffer, offs, "%u\n", value) 24483a13ef5SFelix Kuehling #define sysfs_show_str_val(buffer, offs, value) \ 24583a13ef5SFelix Kuehling sysfs_show_gen_prop(buffer, offs, "%s\n", value) 2465b5c4e40SEvgeny Pinchuk 2475b5c4e40SEvgeny Pinchuk static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, 2485b5c4e40SEvgeny Pinchuk char *buffer) 2495b5c4e40SEvgeny Pinchuk { 25083a13ef5SFelix Kuehling int offs = 0; 2515b5c4e40SEvgeny Pinchuk 2525b5c4e40SEvgeny Pinchuk /* Making sure that the buffer is an empty string */ 2535b5c4e40SEvgeny Pinchuk buffer[0] = 0; 2545b5c4e40SEvgeny Pinchuk 2555b5c4e40SEvgeny Pinchuk if (attr == &sys_props.attr_genid) { 25683a13ef5SFelix Kuehling sysfs_show_32bit_val(buffer, offs, 25783a13ef5SFelix Kuehling sys_props.generation_count); 2585b5c4e40SEvgeny Pinchuk } else if (attr == &sys_props.attr_props) { 25983a13ef5SFelix Kuehling sysfs_show_64bit_prop(buffer, offs, "platform_oem", 2605b5c4e40SEvgeny Pinchuk sys_props.platform_oem); 26183a13ef5SFelix Kuehling sysfs_show_64bit_prop(buffer, offs, "platform_id", 2625b5c4e40SEvgeny Pinchuk sys_props.platform_id); 26383a13ef5SFelix Kuehling sysfs_show_64bit_prop(buffer, offs, "platform_rev", 2645b5c4e40SEvgeny Pinchuk sys_props.platform_rev); 2655b5c4e40SEvgeny Pinchuk } else { 26683a13ef5SFelix Kuehling offs = -EINVAL; 2675b5c4e40SEvgeny Pinchuk } 2685b5c4e40SEvgeny Pinchuk 26983a13ef5SFelix Kuehling return offs; 2705b5c4e40SEvgeny Pinchuk } 2715b5c4e40SEvgeny Pinchuk 2725108d768SYong Zhao static void kfd_topology_kobj_release(struct kobject *kobj) 2735108d768SYong Zhao { 2745108d768SYong Zhao kfree(kobj); 2755108d768SYong Zhao } 2765108d768SYong Zhao 2775b5c4e40SEvgeny Pinchuk static const struct sysfs_ops sysprops_ops = { 2785b5c4e40SEvgeny Pinchuk .show = sysprops_show, 2795b5c4e40SEvgeny Pinchuk }; 2805b5c4e40SEvgeny Pinchuk 2814fa01c63SThomas Weißschuh static const struct kobj_type sysprops_type = { 2825108d768SYong Zhao .release = kfd_topology_kobj_release, 2835b5c4e40SEvgeny Pinchuk .sysfs_ops = &sysprops_ops, 2845b5c4e40SEvgeny Pinchuk }; 2855b5c4e40SEvgeny Pinchuk 2865b5c4e40SEvgeny Pinchuk static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, 2875b5c4e40SEvgeny Pinchuk char *buffer) 2885b5c4e40SEvgeny Pinchuk { 28983a13ef5SFelix Kuehling int offs = 0; 2905b5c4e40SEvgeny Pinchuk struct kfd_iolink_properties *iolink; 2915b5c4e40SEvgeny Pinchuk 2925b5c4e40SEvgeny Pinchuk /* Making sure that the buffer is an empty string */ 2935b5c4e40SEvgeny Pinchuk buffer[0] = 0; 2945b5c4e40SEvgeny Pinchuk 2955b5c4e40SEvgeny Pinchuk iolink = container_of(attr, struct kfd_iolink_properties, attr); 2966b855f7bSHarish Kasiviswanathan if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu)) 2976b855f7bSHarish Kasiviswanathan return -EPERM; 29883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type); 29983a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj); 30083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min); 30183a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from); 30283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to); 30383a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight); 30483a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency); 30583a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency); 30683a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "min_bandwidth", 30783a13ef5SFelix Kuehling iolink->min_bandwidth); 30883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "max_bandwidth", 30983a13ef5SFelix Kuehling iolink->max_bandwidth); 31083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", 3115b5c4e40SEvgeny Pinchuk iolink->rec_transfer_size); 31283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); 3135b5c4e40SEvgeny Pinchuk 31483a13ef5SFelix Kuehling return offs; 3155b5c4e40SEvgeny Pinchuk } 3165b5c4e40SEvgeny Pinchuk 3175b5c4e40SEvgeny Pinchuk static const struct sysfs_ops iolink_ops = { 3185b5c4e40SEvgeny Pinchuk .show = iolink_show, 3195b5c4e40SEvgeny Pinchuk }; 3205b5c4e40SEvgeny Pinchuk 3214fa01c63SThomas Weißschuh static const struct kobj_type iolink_type = { 3225108d768SYong Zhao .release = kfd_topology_kobj_release, 3235b5c4e40SEvgeny Pinchuk .sysfs_ops = &iolink_ops, 3245b5c4e40SEvgeny Pinchuk }; 3255b5c4e40SEvgeny Pinchuk 3265b5c4e40SEvgeny Pinchuk static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, 3275b5c4e40SEvgeny Pinchuk char *buffer) 3285b5c4e40SEvgeny Pinchuk { 32983a13ef5SFelix Kuehling int offs = 0; 3305b5c4e40SEvgeny Pinchuk struct kfd_mem_properties *mem; 3315b5c4e40SEvgeny Pinchuk 3325b5c4e40SEvgeny Pinchuk /* Making sure that the buffer is an empty string */ 3335b5c4e40SEvgeny Pinchuk buffer[0] = 0; 3345b5c4e40SEvgeny Pinchuk 3355b5c4e40SEvgeny Pinchuk mem = container_of(attr, struct kfd_mem_properties, attr); 3366b855f7bSHarish Kasiviswanathan if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) 3376b855f7bSHarish Kasiviswanathan return -EPERM; 33883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); 33983a13ef5SFelix Kuehling sysfs_show_64bit_prop(buffer, offs, "size_in_bytes", 34083a13ef5SFelix Kuehling mem->size_in_bytes); 34183a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags); 34283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "width", mem->width); 34383a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "mem_clk_max", 34483a13ef5SFelix Kuehling mem->mem_clk_max); 3455b5c4e40SEvgeny Pinchuk 34683a13ef5SFelix Kuehling return offs; 3475b5c4e40SEvgeny Pinchuk } 3485b5c4e40SEvgeny Pinchuk 3495b5c4e40SEvgeny Pinchuk static const struct sysfs_ops mem_ops = { 3505b5c4e40SEvgeny Pinchuk .show = mem_show, 3515b5c4e40SEvgeny Pinchuk }; 3525b5c4e40SEvgeny Pinchuk 3534fa01c63SThomas Weißschuh static const struct kobj_type mem_type = { 3545108d768SYong Zhao .release = kfd_topology_kobj_release, 3555b5c4e40SEvgeny Pinchuk .sysfs_ops = &mem_ops, 3565b5c4e40SEvgeny Pinchuk }; 3575b5c4e40SEvgeny Pinchuk 3585b5c4e40SEvgeny Pinchuk static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, 3595b5c4e40SEvgeny Pinchuk char *buffer) 3605b5c4e40SEvgeny Pinchuk { 36183a13ef5SFelix Kuehling int offs = 0; 362bc0c75a3SHarish Kasiviswanathan uint32_t i, j; 3635b5c4e40SEvgeny Pinchuk struct kfd_cache_properties *cache; 3645b5c4e40SEvgeny Pinchuk 3655b5c4e40SEvgeny Pinchuk /* Making sure that the buffer is an empty string */ 3665b5c4e40SEvgeny Pinchuk buffer[0] = 0; 3675b5c4e40SEvgeny Pinchuk cache = container_of(attr, struct kfd_cache_properties, attr); 3686b855f7bSHarish Kasiviswanathan if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) 3696b855f7bSHarish Kasiviswanathan return -EPERM; 37083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "processor_id_low", 3715b5c4e40SEvgeny Pinchuk cache->processor_id_low); 37283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level); 37383a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size); 37483a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "cache_line_size", 37583a13ef5SFelix Kuehling cache->cacheline_size); 37683a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag", 3775b5c4e40SEvgeny Pinchuk cache->cachelines_per_tag); 37883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); 37983a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); 38083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); 381c0cc999fSMa Jun 38283a13ef5SFelix Kuehling offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); 383c0cc999fSMa Jun for (i = 0; i < cache->sibling_map_size; i++) 38483a13ef5SFelix Kuehling for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) 385bc0c75a3SHarish Kasiviswanathan /* Check each bit */ 38683a13ef5SFelix Kuehling offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", 38783a13ef5SFelix Kuehling (cache->sibling_map[i] >> j) & 1); 38883a13ef5SFelix Kuehling 389bc0c75a3SHarish Kasiviswanathan /* Replace the last "," with end of line */ 39083a13ef5SFelix Kuehling buffer[offs-1] = '\n'; 39183a13ef5SFelix Kuehling return offs; 3925b5c4e40SEvgeny Pinchuk } 3935b5c4e40SEvgeny Pinchuk 3945b5c4e40SEvgeny Pinchuk static const struct sysfs_ops cache_ops = { 3955b5c4e40SEvgeny Pinchuk .show = kfd_cache_show, 3965b5c4e40SEvgeny Pinchuk }; 3975b5c4e40SEvgeny Pinchuk 3984fa01c63SThomas Weißschuh static const struct kobj_type cache_type = { 3995108d768SYong Zhao .release = kfd_topology_kobj_release, 4005b5c4e40SEvgeny Pinchuk .sysfs_ops = &cache_ops, 4015b5c4e40SEvgeny Pinchuk }; 4025b5c4e40SEvgeny Pinchuk 403f4757347SAmber Lin /****** Sysfs of Performance Counters ******/ 404f4757347SAmber Lin 405f4757347SAmber Lin struct kfd_perf_attr { 406f4757347SAmber Lin struct kobj_attribute attr; 407f4757347SAmber Lin uint32_t data; 408f4757347SAmber Lin }; 409f4757347SAmber Lin 410f4757347SAmber Lin static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, 411f4757347SAmber Lin char *buf) 412f4757347SAmber Lin { 41383a13ef5SFelix Kuehling int offs = 0; 414f4757347SAmber Lin struct kfd_perf_attr *attr; 415f4757347SAmber Lin 416f4757347SAmber Lin buf[0] = 0; 417f4757347SAmber Lin attr = container_of(attrs, struct kfd_perf_attr, attr); 418f4757347SAmber Lin if (!attr->data) /* invalid data for PMC */ 419f4757347SAmber Lin return 0; 420f4757347SAmber Lin else 42183a13ef5SFelix Kuehling return sysfs_show_32bit_val(buf, offs, attr->data); 422f4757347SAmber Lin } 423f4757347SAmber Lin 424f4757347SAmber Lin #define KFD_PERF_DESC(_name, _data) \ 425f4757347SAmber Lin { \ 426f4757347SAmber Lin .attr = __ATTR(_name, 0444, perf_show, NULL), \ 427f4757347SAmber Lin .data = _data, \ 428f4757347SAmber Lin } 429f4757347SAmber Lin 430f4757347SAmber Lin static struct kfd_perf_attr perf_attr_iommu[] = { 431f4757347SAmber Lin KFD_PERF_DESC(max_concurrent, 0), 432f4757347SAmber Lin KFD_PERF_DESC(num_counters, 0), 433f4757347SAmber Lin KFD_PERF_DESC(counter_ids, 0), 434f4757347SAmber Lin }; 435f4757347SAmber Lin /****************************************/ 436f4757347SAmber Lin 4375b5c4e40SEvgeny Pinchuk static ssize_t node_show(struct kobject *kobj, struct attribute *attr, 4385b5c4e40SEvgeny Pinchuk char *buffer) 4395b5c4e40SEvgeny Pinchuk { 44083a13ef5SFelix Kuehling int offs = 0; 4415b5c4e40SEvgeny Pinchuk struct kfd_topology_device *dev; 442f7c826adSAlexey Skidanov uint32_t log_max_watch_addr; 4435b5c4e40SEvgeny Pinchuk 4445b5c4e40SEvgeny Pinchuk /* Making sure that the buffer is an empty string */ 4455b5c4e40SEvgeny Pinchuk buffer[0] = 0; 4465b5c4e40SEvgeny Pinchuk 4475b5c4e40SEvgeny Pinchuk if (strcmp(attr->name, "gpu_id") == 0) { 4485b5c4e40SEvgeny Pinchuk dev = container_of(attr, struct kfd_topology_device, 4495b5c4e40SEvgeny Pinchuk attr_gpuid); 4506b855f7bSHarish Kasiviswanathan if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) 4516b855f7bSHarish Kasiviswanathan return -EPERM; 45283a13ef5SFelix Kuehling return sysfs_show_32bit_val(buffer, offs, dev->gpu_id); 453f7c826adSAlexey Skidanov } 454f7c826adSAlexey Skidanov 455f7c826adSAlexey Skidanov if (strcmp(attr->name, "name") == 0) { 4565b5c4e40SEvgeny Pinchuk dev = container_of(attr, struct kfd_topology_device, 4575b5c4e40SEvgeny Pinchuk attr_name); 458c181159aSYong Zhao 4596b855f7bSHarish Kasiviswanathan if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) 4606b855f7bSHarish Kasiviswanathan return -EPERM; 46183a13ef5SFelix Kuehling return sysfs_show_str_val(buffer, offs, dev->node_props.name); 462f7c826adSAlexey Skidanov } 463f7c826adSAlexey Skidanov 4645b5c4e40SEvgeny Pinchuk dev = container_of(attr, struct kfd_topology_device, 4655b5c4e40SEvgeny Pinchuk attr_props); 4666b855f7bSHarish Kasiviswanathan if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu)) 4676b855f7bSHarish Kasiviswanathan return -EPERM; 46883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", 4695b5c4e40SEvgeny Pinchuk dev->node_props.cpu_cores_count); 47083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "simd_count", 471f38f147aSMukul Joshi dev->gpu ? (dev->node_props.simd_count * 472*c4050ff1SLijo Lazar NUM_XCC(dev->gpu->xcc_mask)) : 0); 47383a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", 4745b5c4e40SEvgeny Pinchuk dev->node_props.mem_banks_count); 47583a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "caches_count", 4765b5c4e40SEvgeny Pinchuk dev->node_props.caches_count); 47783a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "io_links_count", 4785b5c4e40SEvgeny Pinchuk dev->node_props.io_links_count); 4790f28cca8SRamesh Errabolu sysfs_show_32bit_prop(buffer, offs, "p2p_links_count", 4800f28cca8SRamesh Errabolu dev->node_props.p2p_links_count); 48183a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base", 4825b5c4e40SEvgeny Pinchuk dev->node_props.cpu_core_id_base); 48383a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "simd_id_base", 4845b5c4e40SEvgeny Pinchuk dev->node_props.simd_id_base); 48583a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd", 4865b5c4e40SEvgeny Pinchuk dev->node_props.max_waves_per_simd); 48783a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb", 4885b5c4e40SEvgeny Pinchuk dev->node_props.lds_size_in_kb); 48983a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb", 4905b5c4e40SEvgeny Pinchuk dev->node_props.gds_size_in_kb); 49183a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "num_gws", 49229e76462SOak Zeng dev->node_props.num_gws); 49383a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "wave_front_size", 4945b5c4e40SEvgeny Pinchuk dev->node_props.wave_front_size); 49583a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "array_count", 496f38f147aSMukul Joshi dev->gpu ? (dev->node_props.array_count * 497*c4050ff1SLijo Lazar NUM_XCC(dev->gpu->xcc_mask)) : 0); 49883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine", 4995b5c4e40SEvgeny Pinchuk dev->node_props.simd_arrays_per_engine); 50083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array", 5015b5c4e40SEvgeny Pinchuk dev->node_props.cu_per_simd_array); 50283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "simd_per_cu", 5035b5c4e40SEvgeny Pinchuk dev->node_props.simd_per_cu); 50483a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu", 5055b5c4e40SEvgeny Pinchuk dev->node_props.max_slots_scratch_cu); 5069d6fa9c7SGraham Sider sysfs_show_32bit_prop(buffer, offs, "gfx_target_version", 5079d6fa9c7SGraham Sider dev->node_props.gfx_target_version); 50883a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "vendor_id", 5095b5c4e40SEvgeny Pinchuk dev->node_props.vendor_id); 51083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "device_id", 5115b5c4e40SEvgeny Pinchuk dev->node_props.device_id); 51283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "location_id", 5135b5c4e40SEvgeny Pinchuk dev->node_props.location_id); 51483a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "domain", 5153e58e95aSOri Messinger dev->node_props.domain); 51683a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "drm_render_minor", 5177c9b7171SOak Zeng dev->node_props.drm_render_minor); 51883a13ef5SFelix Kuehling sysfs_show_64bit_prop(buffer, offs, "hive_id", 5190c1690e3SShaoyun Liu dev->node_props.hive_id); 52083a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines", 52114568cf6SOak Zeng dev->node_props.num_sdma_engines); 52283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines", 52314568cf6SOak Zeng dev->node_props.num_sdma_xgmi_engines); 52483a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine", 525bb71c74dSHuang Rui dev->node_props.num_sdma_queues_per_engine); 52683a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "num_cp_queues", 527f4feb9faSHuang Rui dev->node_props.num_cp_queues); 5285b5c4e40SEvgeny Pinchuk 5295b5c4e40SEvgeny Pinchuk if (dev->gpu) { 530f7c826adSAlexey Skidanov log_max_watch_addr = 5318dc1db31SMukul Joshi __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); 532f7c826adSAlexey Skidanov 533f7c826adSAlexey Skidanov if (log_max_watch_addr) { 534f7c826adSAlexey Skidanov dev->node_props.capability |= 535f7c826adSAlexey Skidanov HSA_CAP_WATCH_POINTS_SUPPORTED; 536f7c826adSAlexey Skidanov 537f7c826adSAlexey Skidanov dev->node_props.capability |= 538f7c826adSAlexey Skidanov ((log_max_watch_addr << 539f7c826adSAlexey Skidanov HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & 540f7c826adSAlexey Skidanov HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); 541f7c826adSAlexey Skidanov } 542f7c826adSAlexey Skidanov 5437eb0502aSGraham Sider if (dev->gpu->adev->asic_type == CHIP_TONGA) 544413e85d5SBen Goz dev->node_props.capability |= 545413e85d5SBen Goz HSA_CAP_AQL_QUEUE_DOUBLE_MAP; 546413e85d5SBen Goz 54783a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", 5483a87177eSHarish Kasiviswanathan dev->node_props.max_engine_clk_fcompute); 54942e08c78SOded Gabbay 55083a13ef5SFelix Kuehling sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL); 551f1386fbcSOded Gabbay 55283a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "fw_version", 5538dc1db31SMukul Joshi dev->gpu->kfd->mec_fw_version); 55483a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "capability", 555826f5de8SAlexey Skidanov dev->node_props.capability); 55683a13ef5SFelix Kuehling sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", 5578dc1db31SMukul Joshi dev->gpu->kfd->sdma_fw_version); 55811964258SKent Russell sysfs_show_64bit_prop(buffer, offs, "unique_id", 55902274fc0SGraham Sider dev->gpu->adev->unique_id); 56074c5b85dSMukul Joshi sysfs_show_32bit_prop(buffer, offs, "num_xcc", 561*c4050ff1SLijo Lazar NUM_XCC(dev->gpu->xcc_mask)); 5625b5c4e40SEvgeny Pinchuk } 5635b5c4e40SEvgeny Pinchuk 56483a13ef5SFelix Kuehling return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", 5655b5c4e40SEvgeny Pinchuk cpufreq_quick_get_max(0)/1000); 5665b5c4e40SEvgeny Pinchuk } 5675b5c4e40SEvgeny Pinchuk 5685b5c4e40SEvgeny Pinchuk static const struct sysfs_ops node_ops = { 5695b5c4e40SEvgeny Pinchuk .show = node_show, 5705b5c4e40SEvgeny Pinchuk }; 5715b5c4e40SEvgeny Pinchuk 5724fa01c63SThomas Weißschuh static const struct kobj_type node_type = { 5735108d768SYong Zhao .release = kfd_topology_kobj_release, 5745b5c4e40SEvgeny Pinchuk .sysfs_ops = &node_ops, 5755b5c4e40SEvgeny Pinchuk }; 5765b5c4e40SEvgeny Pinchuk 5775b5c4e40SEvgeny Pinchuk static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) 5785b5c4e40SEvgeny Pinchuk { 5795b5c4e40SEvgeny Pinchuk sysfs_remove_file(kobj, attr); 5805b5c4e40SEvgeny Pinchuk kobject_del(kobj); 5815b5c4e40SEvgeny Pinchuk kobject_put(kobj); 5825b5c4e40SEvgeny Pinchuk } 5835b5c4e40SEvgeny Pinchuk 5845b5c4e40SEvgeny Pinchuk static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) 5855b5c4e40SEvgeny Pinchuk { 5860f28cca8SRamesh Errabolu struct kfd_iolink_properties *p2plink; 5875b5c4e40SEvgeny Pinchuk struct kfd_iolink_properties *iolink; 5885b5c4e40SEvgeny Pinchuk struct kfd_cache_properties *cache; 5895b5c4e40SEvgeny Pinchuk struct kfd_mem_properties *mem; 590f4757347SAmber Lin struct kfd_perf_properties *perf; 5915b5c4e40SEvgeny Pinchuk 5925b5c4e40SEvgeny Pinchuk if (dev->kobj_iolink) { 5935b5c4e40SEvgeny Pinchuk list_for_each_entry(iolink, &dev->io_link_props, list) 5945b5c4e40SEvgeny Pinchuk if (iolink->kobj) { 5955b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_file(iolink->kobj, 5965b5c4e40SEvgeny Pinchuk &iolink->attr); 59716b9201cSOded Gabbay iolink->kobj = NULL; 5985b5c4e40SEvgeny Pinchuk } 5995b5c4e40SEvgeny Pinchuk kobject_del(dev->kobj_iolink); 6005b5c4e40SEvgeny Pinchuk kobject_put(dev->kobj_iolink); 60116b9201cSOded Gabbay dev->kobj_iolink = NULL; 6025b5c4e40SEvgeny Pinchuk } 6035b5c4e40SEvgeny Pinchuk 6040f28cca8SRamesh Errabolu if (dev->kobj_p2plink) { 6050f28cca8SRamesh Errabolu list_for_each_entry(p2plink, &dev->p2p_link_props, list) 6060f28cca8SRamesh Errabolu if (p2plink->kobj) { 6070f28cca8SRamesh Errabolu kfd_remove_sysfs_file(p2plink->kobj, 6080f28cca8SRamesh Errabolu &p2plink->attr); 6090f28cca8SRamesh Errabolu p2plink->kobj = NULL; 6100f28cca8SRamesh Errabolu } 6110f28cca8SRamesh Errabolu kobject_del(dev->kobj_p2plink); 6120f28cca8SRamesh Errabolu kobject_put(dev->kobj_p2plink); 6130f28cca8SRamesh Errabolu dev->kobj_p2plink = NULL; 6140f28cca8SRamesh Errabolu } 6150f28cca8SRamesh Errabolu 6165b5c4e40SEvgeny Pinchuk if (dev->kobj_cache) { 6175b5c4e40SEvgeny Pinchuk list_for_each_entry(cache, &dev->cache_props, list) 6185b5c4e40SEvgeny Pinchuk if (cache->kobj) { 6195b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_file(cache->kobj, 6205b5c4e40SEvgeny Pinchuk &cache->attr); 62116b9201cSOded Gabbay cache->kobj = NULL; 6225b5c4e40SEvgeny Pinchuk } 6235b5c4e40SEvgeny Pinchuk kobject_del(dev->kobj_cache); 6245b5c4e40SEvgeny Pinchuk kobject_put(dev->kobj_cache); 62516b9201cSOded Gabbay dev->kobj_cache = NULL; 6265b5c4e40SEvgeny Pinchuk } 6275b5c4e40SEvgeny Pinchuk 6285b5c4e40SEvgeny Pinchuk if (dev->kobj_mem) { 6295b5c4e40SEvgeny Pinchuk list_for_each_entry(mem, &dev->mem_props, list) 6305b5c4e40SEvgeny Pinchuk if (mem->kobj) { 6315b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_file(mem->kobj, &mem->attr); 63216b9201cSOded Gabbay mem->kobj = NULL; 6335b5c4e40SEvgeny Pinchuk } 6345b5c4e40SEvgeny Pinchuk kobject_del(dev->kobj_mem); 6355b5c4e40SEvgeny Pinchuk kobject_put(dev->kobj_mem); 63616b9201cSOded Gabbay dev->kobj_mem = NULL; 6375b5c4e40SEvgeny Pinchuk } 6385b5c4e40SEvgeny Pinchuk 639f4757347SAmber Lin if (dev->kobj_perf) { 640f4757347SAmber Lin list_for_each_entry(perf, &dev->perf_props, list) { 641f4757347SAmber Lin kfree(perf->attr_group); 642f4757347SAmber Lin perf->attr_group = NULL; 643f4757347SAmber Lin } 644f4757347SAmber Lin kobject_del(dev->kobj_perf); 645f4757347SAmber Lin kobject_put(dev->kobj_perf); 646f4757347SAmber Lin dev->kobj_perf = NULL; 647f4757347SAmber Lin } 648f4757347SAmber Lin 6495b5c4e40SEvgeny Pinchuk if (dev->kobj_node) { 6505b5c4e40SEvgeny Pinchuk sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); 6515b5c4e40SEvgeny Pinchuk sysfs_remove_file(dev->kobj_node, &dev->attr_name); 6525b5c4e40SEvgeny Pinchuk sysfs_remove_file(dev->kobj_node, &dev->attr_props); 6535b5c4e40SEvgeny Pinchuk kobject_del(dev->kobj_node); 6545b5c4e40SEvgeny Pinchuk kobject_put(dev->kobj_node); 65516b9201cSOded Gabbay dev->kobj_node = NULL; 6565b5c4e40SEvgeny Pinchuk } 6575b5c4e40SEvgeny Pinchuk } 6585b5c4e40SEvgeny Pinchuk 6595b5c4e40SEvgeny Pinchuk static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, 6605b5c4e40SEvgeny Pinchuk uint32_t id) 6615b5c4e40SEvgeny Pinchuk { 6620f28cca8SRamesh Errabolu struct kfd_iolink_properties *p2plink; 6635b5c4e40SEvgeny Pinchuk struct kfd_iolink_properties *iolink; 6645b5c4e40SEvgeny Pinchuk struct kfd_cache_properties *cache; 6655b5c4e40SEvgeny Pinchuk struct kfd_mem_properties *mem; 666f4757347SAmber Lin struct kfd_perf_properties *perf; 6675b5c4e40SEvgeny Pinchuk int ret; 668f4757347SAmber Lin uint32_t i, num_attrs; 669f4757347SAmber Lin struct attribute **attrs; 6705b5c4e40SEvgeny Pinchuk 67132fa8219SFelix Kuehling if (WARN_ON(dev->kobj_node)) 67232fa8219SFelix Kuehling return -EEXIST; 67332fa8219SFelix Kuehling 6745b5c4e40SEvgeny Pinchuk /* 6755b5c4e40SEvgeny Pinchuk * Creating the sysfs folders 6765b5c4e40SEvgeny Pinchuk */ 6775b5c4e40SEvgeny Pinchuk dev->kobj_node = kfd_alloc_struct(dev->kobj_node); 6785b5c4e40SEvgeny Pinchuk if (!dev->kobj_node) 6795b5c4e40SEvgeny Pinchuk return -ENOMEM; 6805b5c4e40SEvgeny Pinchuk 6815b5c4e40SEvgeny Pinchuk ret = kobject_init_and_add(dev->kobj_node, &node_type, 6825b5c4e40SEvgeny Pinchuk sys_props.kobj_nodes, "%d", id); 68320eca012SQiushi Wu if (ret < 0) { 68420eca012SQiushi Wu kobject_put(dev->kobj_node); 6855b5c4e40SEvgeny Pinchuk return ret; 68620eca012SQiushi Wu } 6875b5c4e40SEvgeny Pinchuk 6885b5c4e40SEvgeny Pinchuk dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); 6895b5c4e40SEvgeny Pinchuk if (!dev->kobj_mem) 6905b5c4e40SEvgeny Pinchuk return -ENOMEM; 6915b5c4e40SEvgeny Pinchuk 6925b5c4e40SEvgeny Pinchuk dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); 6935b5c4e40SEvgeny Pinchuk if (!dev->kobj_cache) 6945b5c4e40SEvgeny Pinchuk return -ENOMEM; 6955b5c4e40SEvgeny Pinchuk 6965b5c4e40SEvgeny Pinchuk dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); 6975b5c4e40SEvgeny Pinchuk if (!dev->kobj_iolink) 6985b5c4e40SEvgeny Pinchuk return -ENOMEM; 6995b5c4e40SEvgeny Pinchuk 7000f28cca8SRamesh Errabolu dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); 7010f28cca8SRamesh Errabolu if (!dev->kobj_p2plink) 7020f28cca8SRamesh Errabolu return -ENOMEM; 7030f28cca8SRamesh Errabolu 704f4757347SAmber Lin dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); 705f4757347SAmber Lin if (!dev->kobj_perf) 706f4757347SAmber Lin return -ENOMEM; 707f4757347SAmber Lin 7085b5c4e40SEvgeny Pinchuk /* 7095b5c4e40SEvgeny Pinchuk * Creating sysfs files for node properties 7105b5c4e40SEvgeny Pinchuk */ 7115b5c4e40SEvgeny Pinchuk dev->attr_gpuid.name = "gpu_id"; 7125b5c4e40SEvgeny Pinchuk dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; 7135b5c4e40SEvgeny Pinchuk sysfs_attr_init(&dev->attr_gpuid); 7145b5c4e40SEvgeny Pinchuk dev->attr_name.name = "name"; 7155b5c4e40SEvgeny Pinchuk dev->attr_name.mode = KFD_SYSFS_FILE_MODE; 7165b5c4e40SEvgeny Pinchuk sysfs_attr_init(&dev->attr_name); 7175b5c4e40SEvgeny Pinchuk dev->attr_props.name = "properties"; 7185b5c4e40SEvgeny Pinchuk dev->attr_props.mode = KFD_SYSFS_FILE_MODE; 7195b5c4e40SEvgeny Pinchuk sysfs_attr_init(&dev->attr_props); 7205b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); 7215b5c4e40SEvgeny Pinchuk if (ret < 0) 7225b5c4e40SEvgeny Pinchuk return ret; 7235b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); 7245b5c4e40SEvgeny Pinchuk if (ret < 0) 7255b5c4e40SEvgeny Pinchuk return ret; 7265b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); 7275b5c4e40SEvgeny Pinchuk if (ret < 0) 7285b5c4e40SEvgeny Pinchuk return ret; 7295b5c4e40SEvgeny Pinchuk 7305b5c4e40SEvgeny Pinchuk i = 0; 7315b5c4e40SEvgeny Pinchuk list_for_each_entry(mem, &dev->mem_props, list) { 7325b5c4e40SEvgeny Pinchuk mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 7335b5c4e40SEvgeny Pinchuk if (!mem->kobj) 7345b5c4e40SEvgeny Pinchuk return -ENOMEM; 7355b5c4e40SEvgeny Pinchuk ret = kobject_init_and_add(mem->kobj, &mem_type, 7365b5c4e40SEvgeny Pinchuk dev->kobj_mem, "%d", i); 73720eca012SQiushi Wu if (ret < 0) { 73820eca012SQiushi Wu kobject_put(mem->kobj); 7395b5c4e40SEvgeny Pinchuk return ret; 74020eca012SQiushi Wu } 7415b5c4e40SEvgeny Pinchuk 7425b5c4e40SEvgeny Pinchuk mem->attr.name = "properties"; 7435b5c4e40SEvgeny Pinchuk mem->attr.mode = KFD_SYSFS_FILE_MODE; 7445b5c4e40SEvgeny Pinchuk sysfs_attr_init(&mem->attr); 7455b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(mem->kobj, &mem->attr); 7465b5c4e40SEvgeny Pinchuk if (ret < 0) 7475b5c4e40SEvgeny Pinchuk return ret; 7485b5c4e40SEvgeny Pinchuk i++; 7495b5c4e40SEvgeny Pinchuk } 7505b5c4e40SEvgeny Pinchuk 7515b5c4e40SEvgeny Pinchuk i = 0; 7525b5c4e40SEvgeny Pinchuk list_for_each_entry(cache, &dev->cache_props, list) { 7535b5c4e40SEvgeny Pinchuk cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 7545b5c4e40SEvgeny Pinchuk if (!cache->kobj) 7555b5c4e40SEvgeny Pinchuk return -ENOMEM; 7565b5c4e40SEvgeny Pinchuk ret = kobject_init_and_add(cache->kobj, &cache_type, 7575b5c4e40SEvgeny Pinchuk dev->kobj_cache, "%d", i); 75820eca012SQiushi Wu if (ret < 0) { 75920eca012SQiushi Wu kobject_put(cache->kobj); 7605b5c4e40SEvgeny Pinchuk return ret; 76120eca012SQiushi Wu } 7625b5c4e40SEvgeny Pinchuk 7635b5c4e40SEvgeny Pinchuk cache->attr.name = "properties"; 7645b5c4e40SEvgeny Pinchuk cache->attr.mode = KFD_SYSFS_FILE_MODE; 7655b5c4e40SEvgeny Pinchuk sysfs_attr_init(&cache->attr); 7665b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(cache->kobj, &cache->attr); 7675b5c4e40SEvgeny Pinchuk if (ret < 0) 7685b5c4e40SEvgeny Pinchuk return ret; 7695b5c4e40SEvgeny Pinchuk i++; 7705b5c4e40SEvgeny Pinchuk } 7715b5c4e40SEvgeny Pinchuk 7725b5c4e40SEvgeny Pinchuk i = 0; 7735b5c4e40SEvgeny Pinchuk list_for_each_entry(iolink, &dev->io_link_props, list) { 7745b5c4e40SEvgeny Pinchuk iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 7755b5c4e40SEvgeny Pinchuk if (!iolink->kobj) 7765b5c4e40SEvgeny Pinchuk return -ENOMEM; 7775b5c4e40SEvgeny Pinchuk ret = kobject_init_and_add(iolink->kobj, &iolink_type, 7785b5c4e40SEvgeny Pinchuk dev->kobj_iolink, "%d", i); 77920eca012SQiushi Wu if (ret < 0) { 78020eca012SQiushi Wu kobject_put(iolink->kobj); 7815b5c4e40SEvgeny Pinchuk return ret; 78220eca012SQiushi Wu } 7835b5c4e40SEvgeny Pinchuk 7845b5c4e40SEvgeny Pinchuk iolink->attr.name = "properties"; 7855b5c4e40SEvgeny Pinchuk iolink->attr.mode = KFD_SYSFS_FILE_MODE; 7865b5c4e40SEvgeny Pinchuk sysfs_attr_init(&iolink->attr); 7875b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(iolink->kobj, &iolink->attr); 7885b5c4e40SEvgeny Pinchuk if (ret < 0) 7895b5c4e40SEvgeny Pinchuk return ret; 7905b5c4e40SEvgeny Pinchuk i++; 7915b5c4e40SEvgeny Pinchuk } 7925b5c4e40SEvgeny Pinchuk 7930f28cca8SRamesh Errabolu i = 0; 7940f28cca8SRamesh Errabolu list_for_each_entry(p2plink, &dev->p2p_link_props, list) { 7950f28cca8SRamesh Errabolu p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 7960f28cca8SRamesh Errabolu if (!p2plink->kobj) 7970f28cca8SRamesh Errabolu return -ENOMEM; 7980f28cca8SRamesh Errabolu ret = kobject_init_and_add(p2plink->kobj, &iolink_type, 7990f28cca8SRamesh Errabolu dev->kobj_p2plink, "%d", i); 8000f28cca8SRamesh Errabolu if (ret < 0) { 8010f28cca8SRamesh Errabolu kobject_put(p2plink->kobj); 8020f28cca8SRamesh Errabolu return ret; 8030f28cca8SRamesh Errabolu } 8040f28cca8SRamesh Errabolu 8050f28cca8SRamesh Errabolu p2plink->attr.name = "properties"; 8060f28cca8SRamesh Errabolu p2plink->attr.mode = KFD_SYSFS_FILE_MODE; 8071f9d1ff1SMukul Joshi sysfs_attr_init(&p2plink->attr); 8080f28cca8SRamesh Errabolu ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); 8090f28cca8SRamesh Errabolu if (ret < 0) 8100f28cca8SRamesh Errabolu return ret; 8110f28cca8SRamesh Errabolu i++; 8120f28cca8SRamesh Errabolu } 8130f28cca8SRamesh Errabolu 814f4757347SAmber Lin /* All hardware blocks have the same number of attributes. */ 8153f866f5fSGustavo A. R. Silva num_attrs = ARRAY_SIZE(perf_attr_iommu); 816f4757347SAmber Lin list_for_each_entry(perf, &dev->perf_props, list) { 817f4757347SAmber Lin perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) 818f4757347SAmber Lin * num_attrs + sizeof(struct attribute_group), 819f4757347SAmber Lin GFP_KERNEL); 820f4757347SAmber Lin if (!perf->attr_group) 821f4757347SAmber Lin return -ENOMEM; 822f4757347SAmber Lin 823f4757347SAmber Lin attrs = (struct attribute **)(perf->attr_group + 1); 824f4757347SAmber Lin if (!strcmp(perf->block_name, "iommu")) { 825f4757347SAmber Lin /* Information of IOMMU's num_counters and counter_ids is shown 826f4757347SAmber Lin * under /sys/bus/event_source/devices/amd_iommu. We don't 827f4757347SAmber Lin * duplicate here. 828f4757347SAmber Lin */ 829f4757347SAmber Lin perf_attr_iommu[0].data = perf->max_concurrent; 830f4757347SAmber Lin for (i = 0; i < num_attrs; i++) 831f4757347SAmber Lin attrs[i] = &perf_attr_iommu[i].attr.attr; 832f4757347SAmber Lin } 833f4757347SAmber Lin perf->attr_group->name = perf->block_name; 834f4757347SAmber Lin perf->attr_group->attrs = attrs; 835f4757347SAmber Lin ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); 836f4757347SAmber Lin if (ret < 0) 837f4757347SAmber Lin return ret; 838f4757347SAmber Lin } 839f4757347SAmber Lin 8405b5c4e40SEvgeny Pinchuk return 0; 8415b5c4e40SEvgeny Pinchuk } 8425b5c4e40SEvgeny Pinchuk 8433a87177eSHarish Kasiviswanathan /* Called with write topology lock acquired */ 8445b5c4e40SEvgeny Pinchuk static int kfd_build_sysfs_node_tree(void) 8455b5c4e40SEvgeny Pinchuk { 8465b5c4e40SEvgeny Pinchuk struct kfd_topology_device *dev; 8475b5c4e40SEvgeny Pinchuk int ret; 8485b5c4e40SEvgeny Pinchuk uint32_t i = 0; 8495b5c4e40SEvgeny Pinchuk 8505b5c4e40SEvgeny Pinchuk list_for_each_entry(dev, &topology_device_list, list) { 8518dfead6cSBen Goz ret = kfd_build_sysfs_node_entry(dev, i); 8525b5c4e40SEvgeny Pinchuk if (ret < 0) 8535b5c4e40SEvgeny Pinchuk return ret; 8545b5c4e40SEvgeny Pinchuk i++; 8555b5c4e40SEvgeny Pinchuk } 8565b5c4e40SEvgeny Pinchuk 8575b5c4e40SEvgeny Pinchuk return 0; 8585b5c4e40SEvgeny Pinchuk } 8595b5c4e40SEvgeny Pinchuk 8603a87177eSHarish Kasiviswanathan /* Called with write topology lock acquired */ 8615b5c4e40SEvgeny Pinchuk static void kfd_remove_sysfs_node_tree(void) 8625b5c4e40SEvgeny Pinchuk { 8635b5c4e40SEvgeny Pinchuk struct kfd_topology_device *dev; 8645b5c4e40SEvgeny Pinchuk 8655b5c4e40SEvgeny Pinchuk list_for_each_entry(dev, &topology_device_list, list) 8665b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_node_entry(dev); 8675b5c4e40SEvgeny Pinchuk } 8685b5c4e40SEvgeny Pinchuk 8695b5c4e40SEvgeny Pinchuk static int kfd_topology_update_sysfs(void) 8705b5c4e40SEvgeny Pinchuk { 8715b5c4e40SEvgeny Pinchuk int ret; 8725b5c4e40SEvgeny Pinchuk 8734eacc26bSKent Russell if (!sys_props.kobj_topology) { 8745b5c4e40SEvgeny Pinchuk sys_props.kobj_topology = 8755b5c4e40SEvgeny Pinchuk kfd_alloc_struct(sys_props.kobj_topology); 8765b5c4e40SEvgeny Pinchuk if (!sys_props.kobj_topology) 8775b5c4e40SEvgeny Pinchuk return -ENOMEM; 8785b5c4e40SEvgeny Pinchuk 8795b5c4e40SEvgeny Pinchuk ret = kobject_init_and_add(sys_props.kobj_topology, 8805b5c4e40SEvgeny Pinchuk &sysprops_type, &kfd_device->kobj, 8815b5c4e40SEvgeny Pinchuk "topology"); 88220eca012SQiushi Wu if (ret < 0) { 88320eca012SQiushi Wu kobject_put(sys_props.kobj_topology); 8845b5c4e40SEvgeny Pinchuk return ret; 88520eca012SQiushi Wu } 8865b5c4e40SEvgeny Pinchuk 8875b5c4e40SEvgeny Pinchuk sys_props.kobj_nodes = kobject_create_and_add("nodes", 8885b5c4e40SEvgeny Pinchuk sys_props.kobj_topology); 8895b5c4e40SEvgeny Pinchuk if (!sys_props.kobj_nodes) 8905b5c4e40SEvgeny Pinchuk return -ENOMEM; 8915b5c4e40SEvgeny Pinchuk 8925b5c4e40SEvgeny Pinchuk sys_props.attr_genid.name = "generation_id"; 8935b5c4e40SEvgeny Pinchuk sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; 8945b5c4e40SEvgeny Pinchuk sysfs_attr_init(&sys_props.attr_genid); 8955b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(sys_props.kobj_topology, 8965b5c4e40SEvgeny Pinchuk &sys_props.attr_genid); 8975b5c4e40SEvgeny Pinchuk if (ret < 0) 8985b5c4e40SEvgeny Pinchuk return ret; 8995b5c4e40SEvgeny Pinchuk 9005b5c4e40SEvgeny Pinchuk sys_props.attr_props.name = "system_properties"; 9015b5c4e40SEvgeny Pinchuk sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; 9025b5c4e40SEvgeny Pinchuk sysfs_attr_init(&sys_props.attr_props); 9035b5c4e40SEvgeny Pinchuk ret = sysfs_create_file(sys_props.kobj_topology, 9045b5c4e40SEvgeny Pinchuk &sys_props.attr_props); 9055b5c4e40SEvgeny Pinchuk if (ret < 0) 9065b5c4e40SEvgeny Pinchuk return ret; 9075b5c4e40SEvgeny Pinchuk } 9085b5c4e40SEvgeny Pinchuk 9095b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_node_tree(); 9105b5c4e40SEvgeny Pinchuk 9115b5c4e40SEvgeny Pinchuk return kfd_build_sysfs_node_tree(); 9125b5c4e40SEvgeny Pinchuk } 9135b5c4e40SEvgeny Pinchuk 9145b5c4e40SEvgeny Pinchuk static void kfd_topology_release_sysfs(void) 9155b5c4e40SEvgeny Pinchuk { 9165b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_node_tree(); 9175b5c4e40SEvgeny Pinchuk if (sys_props.kobj_topology) { 9185b5c4e40SEvgeny Pinchuk sysfs_remove_file(sys_props.kobj_topology, 9195b5c4e40SEvgeny Pinchuk &sys_props.attr_genid); 9205b5c4e40SEvgeny Pinchuk sysfs_remove_file(sys_props.kobj_topology, 9215b5c4e40SEvgeny Pinchuk &sys_props.attr_props); 9225b5c4e40SEvgeny Pinchuk if (sys_props.kobj_nodes) { 9235b5c4e40SEvgeny Pinchuk kobject_del(sys_props.kobj_nodes); 9245b5c4e40SEvgeny Pinchuk kobject_put(sys_props.kobj_nodes); 92516b9201cSOded Gabbay sys_props.kobj_nodes = NULL; 9265b5c4e40SEvgeny Pinchuk } 9275b5c4e40SEvgeny Pinchuk kobject_del(sys_props.kobj_topology); 9285b5c4e40SEvgeny Pinchuk kobject_put(sys_props.kobj_topology); 92916b9201cSOded Gabbay sys_props.kobj_topology = NULL; 9305b5c4e40SEvgeny Pinchuk } 9315b5c4e40SEvgeny Pinchuk } 9325b5c4e40SEvgeny Pinchuk 9334f449311SHarish Kasiviswanathan /* Called with write topology_lock acquired */ 9344f449311SHarish Kasiviswanathan static void kfd_topology_update_device_list(struct list_head *temp_list, 9354f449311SHarish Kasiviswanathan struct list_head *master_list) 9364f449311SHarish Kasiviswanathan { 9374f449311SHarish Kasiviswanathan while (!list_empty(temp_list)) { 9384f449311SHarish Kasiviswanathan list_move_tail(temp_list->next, master_list); 9394f449311SHarish Kasiviswanathan sys_props.num_devices++; 9404f449311SHarish Kasiviswanathan } 9414f449311SHarish Kasiviswanathan } 9424f449311SHarish Kasiviswanathan 943520b8fb7SFelix Kuehling static void kfd_debug_print_topology(void) 944520b8fb7SFelix Kuehling { 945520b8fb7SFelix Kuehling struct kfd_topology_device *dev; 946520b8fb7SFelix Kuehling 947520b8fb7SFelix Kuehling down_read(&topology_lock); 948520b8fb7SFelix Kuehling 949520b8fb7SFelix Kuehling dev = list_last_entry(&topology_device_list, 950520b8fb7SFelix Kuehling struct kfd_topology_device, list); 951520b8fb7SFelix Kuehling if (dev) { 952520b8fb7SFelix Kuehling if (dev->node_props.cpu_cores_count && 953520b8fb7SFelix Kuehling dev->node_props.simd_count) { 954520b8fb7SFelix Kuehling pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", 955520b8fb7SFelix Kuehling dev->node_props.device_id, 956520b8fb7SFelix Kuehling dev->node_props.vendor_id); 957520b8fb7SFelix Kuehling } else if (dev->node_props.cpu_cores_count) 958520b8fb7SFelix Kuehling pr_info("Topology: Add CPU node\n"); 959520b8fb7SFelix Kuehling else if (dev->node_props.simd_count) 960520b8fb7SFelix Kuehling pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", 961520b8fb7SFelix Kuehling dev->node_props.device_id, 962520b8fb7SFelix Kuehling dev->node_props.vendor_id); 963520b8fb7SFelix Kuehling } 964520b8fb7SFelix Kuehling up_read(&topology_lock); 965520b8fb7SFelix Kuehling } 966520b8fb7SFelix Kuehling 967520b8fb7SFelix Kuehling /* Helper function for intializing platform_xx members of 968520b8fb7SFelix Kuehling * kfd_system_properties. Uses OEM info from the last CPU/APU node. 969520b8fb7SFelix Kuehling */ 970520b8fb7SFelix Kuehling static void kfd_update_system_properties(void) 971520b8fb7SFelix Kuehling { 972520b8fb7SFelix Kuehling struct kfd_topology_device *dev; 973520b8fb7SFelix Kuehling 974520b8fb7SFelix Kuehling down_read(&topology_lock); 975520b8fb7SFelix Kuehling dev = list_last_entry(&topology_device_list, 976520b8fb7SFelix Kuehling struct kfd_topology_device, list); 977520b8fb7SFelix Kuehling if (dev) { 978520b8fb7SFelix Kuehling sys_props.platform_id = 979520b8fb7SFelix Kuehling (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; 980520b8fb7SFelix Kuehling sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); 981520b8fb7SFelix Kuehling sys_props.platform_rev = dev->oem_revision; 982520b8fb7SFelix Kuehling } 983520b8fb7SFelix Kuehling up_read(&topology_lock); 984520b8fb7SFelix Kuehling } 985520b8fb7SFelix Kuehling 986520b8fb7SFelix Kuehling static void find_system_memory(const struct dmi_header *dm, 987520b8fb7SFelix Kuehling void *private) 988520b8fb7SFelix Kuehling { 989520b8fb7SFelix Kuehling struct kfd_mem_properties *mem; 990520b8fb7SFelix Kuehling u16 mem_width, mem_clock; 991520b8fb7SFelix Kuehling struct kfd_topology_device *kdev = 992520b8fb7SFelix Kuehling (struct kfd_topology_device *)private; 993520b8fb7SFelix Kuehling const u8 *dmi_data = (const u8 *)(dm + 1); 994520b8fb7SFelix Kuehling 995520b8fb7SFelix Kuehling if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { 996520b8fb7SFelix Kuehling mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); 997520b8fb7SFelix Kuehling mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); 998520b8fb7SFelix Kuehling list_for_each_entry(mem, &kdev->mem_props, list) { 999520b8fb7SFelix Kuehling if (mem_width != 0xFFFF && mem_width != 0) 1000520b8fb7SFelix Kuehling mem->width = mem_width; 1001520b8fb7SFelix Kuehling if (mem_clock != 0) 1002520b8fb7SFelix Kuehling mem->mem_clk_max = mem_clock; 1003520b8fb7SFelix Kuehling } 1004520b8fb7SFelix Kuehling } 1005520b8fb7SFelix Kuehling } 1006f4757347SAmber Lin 1007f4757347SAmber Lin /* 1008f4757347SAmber Lin * Performance counters information is not part of CRAT but we would like to 1009f4757347SAmber Lin * put them in the sysfs under topology directory for Thunk to get the data. 1010f4757347SAmber Lin * This function is called before updating the sysfs. 1011f4757347SAmber Lin */ 1012f4757347SAmber Lin static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) 1013f4757347SAmber Lin { 101464d1c3a4SFelix Kuehling /* These are the only counters supported so far */ 101564d1c3a4SFelix Kuehling return kfd_iommu_add_perf_counters(kdev); 1016f4757347SAmber Lin } 1017f4757347SAmber Lin 1018520b8fb7SFelix Kuehling /* kfd_add_non_crat_information - Add information that is not currently 1019520b8fb7SFelix Kuehling * defined in CRAT but is necessary for KFD topology 1020520b8fb7SFelix Kuehling * @dev - topology device to which addition info is added 1021520b8fb7SFelix Kuehling */ 1022520b8fb7SFelix Kuehling static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) 1023520b8fb7SFelix Kuehling { 1024520b8fb7SFelix Kuehling /* Check if CPU only node. */ 1025520b8fb7SFelix Kuehling if (!kdev->gpu) { 1026520b8fb7SFelix Kuehling /* Add system memory information */ 1027520b8fb7SFelix Kuehling dmi_walk(find_system_memory, kdev); 1028520b8fb7SFelix Kuehling } 1029520b8fb7SFelix Kuehling /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ 1030520b8fb7SFelix Kuehling } 1031520b8fb7SFelix Kuehling 1032b441093eSHarish Kasiviswanathan /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. 1033b441093eSHarish Kasiviswanathan * Ignore CRAT for all other devices. AMD APU is identified if both CPU 1034b441093eSHarish Kasiviswanathan * and GPU cores are present. 1035b441093eSHarish Kasiviswanathan * @device_list - topology device list created by parsing ACPI CRAT table. 1036b441093eSHarish Kasiviswanathan * @return - TRUE if invalid, FALSE is valid. 1037b441093eSHarish Kasiviswanathan */ 1038b441093eSHarish Kasiviswanathan static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) 1039b441093eSHarish Kasiviswanathan { 1040b441093eSHarish Kasiviswanathan struct kfd_topology_device *dev; 1041b441093eSHarish Kasiviswanathan 1042b441093eSHarish Kasiviswanathan list_for_each_entry(dev, device_list, list) { 1043b441093eSHarish Kasiviswanathan if (dev->node_props.cpu_cores_count && 1044b441093eSHarish Kasiviswanathan dev->node_props.simd_count) 1045b441093eSHarish Kasiviswanathan return false; 1046b441093eSHarish Kasiviswanathan } 1047b441093eSHarish Kasiviswanathan pr_info("Ignoring ACPI CRAT on non-APU system\n"); 1048b441093eSHarish Kasiviswanathan return true; 1049b441093eSHarish Kasiviswanathan } 1050b441093eSHarish Kasiviswanathan 10515b5c4e40SEvgeny Pinchuk int kfd_topology_init(void) 10525b5c4e40SEvgeny Pinchuk { 105316b9201cSOded Gabbay void *crat_image = NULL; 10545b5c4e40SEvgeny Pinchuk size_t image_size = 0; 10555b5c4e40SEvgeny Pinchuk int ret; 10564f449311SHarish Kasiviswanathan struct list_head temp_topology_device_list; 1057520b8fb7SFelix Kuehling int cpu_only_node = 0; 1058520b8fb7SFelix Kuehling struct kfd_topology_device *kdev; 1059520b8fb7SFelix Kuehling int proximity_domain; 10605b5c4e40SEvgeny Pinchuk 10614f449311SHarish Kasiviswanathan /* topology_device_list - Master list of all topology devices 10624f449311SHarish Kasiviswanathan * temp_topology_device_list - temporary list created while parsing CRAT 10634f449311SHarish Kasiviswanathan * or VCRAT. Once parsing is complete the contents of list is moved to 10644f449311SHarish Kasiviswanathan * topology_device_list 10655b5c4e40SEvgeny Pinchuk */ 10664f449311SHarish Kasiviswanathan 10674f449311SHarish Kasiviswanathan /* Initialize the head for the both the lists */ 10685b5c4e40SEvgeny Pinchuk INIT_LIST_HEAD(&topology_device_list); 10694f449311SHarish Kasiviswanathan INIT_LIST_HEAD(&temp_topology_device_list); 10705b5c4e40SEvgeny Pinchuk init_rwsem(&topology_lock); 10715b5c4e40SEvgeny Pinchuk 10725b5c4e40SEvgeny Pinchuk memset(&sys_props, 0, sizeof(sys_props)); 10735b5c4e40SEvgeny Pinchuk 1074520b8fb7SFelix Kuehling /* Proximity domains in ACPI CRAT tables start counting at 1075520b8fb7SFelix Kuehling * 0. The same should be true for virtual CRAT tables created 1076520b8fb7SFelix Kuehling * at this stage. GPUs added later in kfd_topology_add_device 1077520b8fb7SFelix Kuehling * use a counter. 1078520b8fb7SFelix Kuehling */ 1079520b8fb7SFelix Kuehling proximity_domain = 0; 1080520b8fb7SFelix Kuehling 10815b5c4e40SEvgeny Pinchuk /* 1082520b8fb7SFelix Kuehling * Get the CRAT image from the ACPI. If ACPI doesn't have one 1083b441093eSHarish Kasiviswanathan * or if ACPI CRAT is invalid create a virtual CRAT. 1084520b8fb7SFelix Kuehling * NOTE: The current implementation expects all AMD APUs to have 1085520b8fb7SFelix Kuehling * CRAT. If no CRAT is available, it is assumed to be a CPU 10865b5c4e40SEvgeny Pinchuk */ 10878e05247dSHarish Kasiviswanathan ret = kfd_create_crat_image_acpi(&crat_image, &image_size); 10888e05247dSHarish Kasiviswanathan if (!ret) { 10894f449311SHarish Kasiviswanathan ret = kfd_parse_crat_table(crat_image, 1090520b8fb7SFelix Kuehling &temp_topology_device_list, 1091520b8fb7SFelix Kuehling proximity_domain); 1092b441093eSHarish Kasiviswanathan if (ret || 1093b441093eSHarish Kasiviswanathan kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { 1094520b8fb7SFelix Kuehling kfd_release_topology_device_list( 1095520b8fb7SFelix Kuehling &temp_topology_device_list); 1096520b8fb7SFelix Kuehling kfd_destroy_crat_image(crat_image); 1097520b8fb7SFelix Kuehling crat_image = NULL; 1098520b8fb7SFelix Kuehling } 1099520b8fb7SFelix Kuehling } 1100520b8fb7SFelix Kuehling 1101520b8fb7SFelix Kuehling if (!crat_image) { 1102520b8fb7SFelix Kuehling ret = kfd_create_crat_image_virtual(&crat_image, &image_size, 1103520b8fb7SFelix Kuehling COMPUTE_UNIT_CPU, NULL, 1104520b8fb7SFelix Kuehling proximity_domain); 1105520b8fb7SFelix Kuehling cpu_only_node = 1; 1106520b8fb7SFelix Kuehling if (ret) { 1107520b8fb7SFelix Kuehling pr_err("Error creating VCRAT table for CPU\n"); 1108520b8fb7SFelix Kuehling return ret; 1109520b8fb7SFelix Kuehling } 1110520b8fb7SFelix Kuehling 1111520b8fb7SFelix Kuehling ret = kfd_parse_crat_table(crat_image, 1112520b8fb7SFelix Kuehling &temp_topology_device_list, 1113520b8fb7SFelix Kuehling proximity_domain); 1114520b8fb7SFelix Kuehling if (ret) { 1115520b8fb7SFelix Kuehling pr_err("Error parsing VCRAT table for CPU\n"); 11168e05247dSHarish Kasiviswanathan goto err; 1117520b8fb7SFelix Kuehling } 11185b5c4e40SEvgeny Pinchuk } 11195b5c4e40SEvgeny Pinchuk 1120f4757347SAmber Lin kdev = list_first_entry(&temp_topology_device_list, 1121f4757347SAmber Lin struct kfd_topology_device, list); 1122f4757347SAmber Lin kfd_add_perf_to_topology(kdev); 1123f4757347SAmber Lin 11245b5c4e40SEvgeny Pinchuk down_write(&topology_lock); 11254f449311SHarish Kasiviswanathan kfd_topology_update_device_list(&temp_topology_device_list, 11264f449311SHarish Kasiviswanathan &topology_device_list); 112746d18d51SMukul Joshi topology_crat_proximity_domain = sys_props.num_devices-1; 11285b5c4e40SEvgeny Pinchuk ret = kfd_topology_update_sysfs(); 11295b5c4e40SEvgeny Pinchuk up_write(&topology_lock); 11308e05247dSHarish Kasiviswanathan 11314f449311SHarish Kasiviswanathan if (!ret) { 11324f449311SHarish Kasiviswanathan sys_props.generation_count++; 1133520b8fb7SFelix Kuehling kfd_update_system_properties(); 1134520b8fb7SFelix Kuehling kfd_debug_print_topology(); 11354f449311SHarish Kasiviswanathan } else 11368e05247dSHarish Kasiviswanathan pr_err("Failed to update topology in sysfs ret=%d\n", ret); 11375b5c4e40SEvgeny Pinchuk 1138520b8fb7SFelix Kuehling /* For nodes with GPU, this information gets added 1139520b8fb7SFelix Kuehling * when GPU is detected (kfd_topology_add_device). 1140520b8fb7SFelix Kuehling */ 1141520b8fb7SFelix Kuehling if (cpu_only_node) { 1142520b8fb7SFelix Kuehling /* Add additional information to CPU only node created above */ 1143520b8fb7SFelix Kuehling down_write(&topology_lock); 1144520b8fb7SFelix Kuehling kdev = list_first_entry(&topology_device_list, 1145520b8fb7SFelix Kuehling struct kfd_topology_device, list); 1146520b8fb7SFelix Kuehling up_write(&topology_lock); 1147520b8fb7SFelix Kuehling kfd_add_non_crat_information(kdev); 1148520b8fb7SFelix Kuehling } 1149520b8fb7SFelix Kuehling 11505b5c4e40SEvgeny Pinchuk err: 11518e05247dSHarish Kasiviswanathan kfd_destroy_crat_image(crat_image); 11525b5c4e40SEvgeny Pinchuk return ret; 11535b5c4e40SEvgeny Pinchuk } 11545b5c4e40SEvgeny Pinchuk 11555b5c4e40SEvgeny Pinchuk void kfd_topology_shutdown(void) 11565b5c4e40SEvgeny Pinchuk { 11574f449311SHarish Kasiviswanathan down_write(&topology_lock); 11585b5c4e40SEvgeny Pinchuk kfd_topology_release_sysfs(); 11595b5c4e40SEvgeny Pinchuk kfd_release_live_view(); 11604f449311SHarish Kasiviswanathan up_write(&topology_lock); 11615b5c4e40SEvgeny Pinchuk } 11625b5c4e40SEvgeny Pinchuk 11638dc1db31SMukul Joshi static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) 11645b5c4e40SEvgeny Pinchuk { 11655b5c4e40SEvgeny Pinchuk uint32_t hashout; 116674c5b85dSMukul Joshi uint32_t buf[8]; 1167585f0e6cSEdward O'Callaghan uint64_t local_mem_size; 11685b5c4e40SEvgeny Pinchuk int i; 11695b5c4e40SEvgeny Pinchuk 11705b5c4e40SEvgeny Pinchuk if (!gpu) 11715b5c4e40SEvgeny Pinchuk return 0; 11725b5c4e40SEvgeny Pinchuk 11738dc1db31SMukul Joshi local_mem_size = gpu->kfd->local_mem_info.local_mem_size_private + 11748dc1db31SMukul Joshi gpu->kfd->local_mem_info.local_mem_size_public; 1175d69a3b76SMukul Joshi buf[0] = gpu->adev->pdev->devfn; 1176d69a3b76SMukul Joshi buf[1] = gpu->adev->pdev->subsystem_vendor | 1177d69a3b76SMukul Joshi (gpu->adev->pdev->subsystem_device << 16); 1178d69a3b76SMukul Joshi buf[2] = pci_domain_nr(gpu->adev->pdev->bus); 1179d69a3b76SMukul Joshi buf[3] = gpu->adev->pdev->device; 1180d69a3b76SMukul Joshi buf[4] = gpu->adev->pdev->bus->number; 1181585f0e6cSEdward O'Callaghan buf[5] = lower_32_bits(local_mem_size); 1182585f0e6cSEdward O'Callaghan buf[6] = upper_32_bits(local_mem_size); 1183*c4050ff1SLijo Lazar buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); 11845b5c4e40SEvgeny Pinchuk 118574c5b85dSMukul Joshi for (i = 0, hashout = 0; i < 8; i++) 11865b5c4e40SEvgeny Pinchuk hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); 11875b5c4e40SEvgeny Pinchuk 11885b5c4e40SEvgeny Pinchuk return hashout; 11895b5c4e40SEvgeny Pinchuk } 11903a87177eSHarish Kasiviswanathan /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If 11913a87177eSHarish Kasiviswanathan * the GPU device is not already present in the topology device 11923a87177eSHarish Kasiviswanathan * list then return NULL. This means a new topology device has to 11933a87177eSHarish Kasiviswanathan * be created for this GPU. 11943a87177eSHarish Kasiviswanathan */ 11958dc1db31SMukul Joshi static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) 11965b5c4e40SEvgeny Pinchuk { 11975b5c4e40SEvgeny Pinchuk struct kfd_topology_device *dev; 119816b9201cSOded Gabbay struct kfd_topology_device *out_dev = NULL; 1199171bc67eSHarish Kasiviswanathan struct kfd_mem_properties *mem; 1200171bc67eSHarish Kasiviswanathan struct kfd_cache_properties *cache; 1201171bc67eSHarish Kasiviswanathan struct kfd_iolink_properties *iolink; 12020f28cca8SRamesh Errabolu struct kfd_iolink_properties *p2plink; 12035b5c4e40SEvgeny Pinchuk 1204b8fe0524SFelix Kuehling list_for_each_entry(dev, &topology_device_list, list) { 1205b8fe0524SFelix Kuehling /* Discrete GPUs need their own topology device list 1206b8fe0524SFelix Kuehling * entries. Don't assign them to CPU/APU nodes. 1207b8fe0524SFelix Kuehling */ 12088dc1db31SMukul Joshi if (!gpu->kfd->use_iommu_v2 && 1209b8fe0524SFelix Kuehling dev->node_props.cpu_cores_count) 1210b8fe0524SFelix Kuehling continue; 1211b8fe0524SFelix Kuehling 12124eacc26bSKent Russell if (!dev->gpu && (dev->node_props.simd_count > 0)) { 12135b5c4e40SEvgeny Pinchuk dev->gpu = gpu; 12145b5c4e40SEvgeny Pinchuk out_dev = dev; 1215171bc67eSHarish Kasiviswanathan 1216171bc67eSHarish Kasiviswanathan list_for_each_entry(mem, &dev->mem_props, list) 1217171bc67eSHarish Kasiviswanathan mem->gpu = dev->gpu; 1218171bc67eSHarish Kasiviswanathan list_for_each_entry(cache, &dev->cache_props, list) 1219171bc67eSHarish Kasiviswanathan cache->gpu = dev->gpu; 1220171bc67eSHarish Kasiviswanathan list_for_each_entry(iolink, &dev->io_link_props, list) 1221171bc67eSHarish Kasiviswanathan iolink->gpu = dev->gpu; 12220f28cca8SRamesh Errabolu list_for_each_entry(p2plink, &dev->p2p_link_props, list) 12230f28cca8SRamesh Errabolu p2plink->gpu = dev->gpu; 12245b5c4e40SEvgeny Pinchuk break; 12255b5c4e40SEvgeny Pinchuk } 1226b8fe0524SFelix Kuehling } 12275b5c4e40SEvgeny Pinchuk return out_dev; 12285b5c4e40SEvgeny Pinchuk } 12295b5c4e40SEvgeny Pinchuk 12305b5c4e40SEvgeny Pinchuk static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) 12315b5c4e40SEvgeny Pinchuk { 12325b5c4e40SEvgeny Pinchuk /* 12335b5c4e40SEvgeny Pinchuk * TODO: Generate an event for thunk about the arrival/removal 12345b5c4e40SEvgeny Pinchuk * of the GPU 12355b5c4e40SEvgeny Pinchuk */ 12365b5c4e40SEvgeny Pinchuk } 12375b5c4e40SEvgeny Pinchuk 12383a87177eSHarish Kasiviswanathan /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, 12393a87177eSHarish Kasiviswanathan * patch this after CRAT parsing. 12403a87177eSHarish Kasiviswanathan */ 12413a87177eSHarish Kasiviswanathan static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) 12423a87177eSHarish Kasiviswanathan { 12433a87177eSHarish Kasiviswanathan struct kfd_mem_properties *mem; 12443a87177eSHarish Kasiviswanathan struct kfd_local_mem_info local_mem_info; 12453a87177eSHarish Kasiviswanathan 12463a87177eSHarish Kasiviswanathan if (!dev) 12473a87177eSHarish Kasiviswanathan return; 12483a87177eSHarish Kasiviswanathan 12493a87177eSHarish Kasiviswanathan /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with 12503a87177eSHarish Kasiviswanathan * single bank of VRAM local memory. 12513a87177eSHarish Kasiviswanathan * for dGPUs - VCRAT reports only one bank of Local Memory 12523a87177eSHarish Kasiviswanathan * for APUs - If CRAT from ACPI reports more than one bank, then 12533a87177eSHarish Kasiviswanathan * all the banks will report the same mem_clk_max information 12543a87177eSHarish Kasiviswanathan */ 1255574c4183SGraham Sider amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); 12563a87177eSHarish Kasiviswanathan 12573a87177eSHarish Kasiviswanathan list_for_each_entry(mem, &dev->mem_props, list) 12583a87177eSHarish Kasiviswanathan mem->mem_clk_max = local_mem_info.mem_clk_max; 12593a87177eSHarish Kasiviswanathan } 12603a87177eSHarish Kasiviswanathan 1261bdd24657SJonathan Kim static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, 1262bdd24657SJonathan Kim struct kfd_topology_device *target_gpu_dev, 1263bdd24657SJonathan Kim struct kfd_iolink_properties *link) 12643a87177eSHarish Kasiviswanathan { 1265bdd24657SJonathan Kim /* xgmi always supports atomics between links. */ 1266bdd24657SJonathan Kim if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) 12673a87177eSHarish Kasiviswanathan return; 12683a87177eSHarish Kasiviswanathan 1269bdd24657SJonathan Kim /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */ 1270bdd24657SJonathan Kim if (target_gpu_dev) { 1271bdd24657SJonathan Kim uint32_t cap; 1272bdd24657SJonathan Kim 1273d69a3b76SMukul Joshi pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev, 1274d35f00d8SEric Huang PCI_EXP_DEVCAP2, &cap); 1275d35f00d8SEric Huang 1276d35f00d8SEric Huang if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 1277d35f00d8SEric Huang PCI_EXP_DEVCAP2_ATOMIC_COMP64))) 1278bdd24657SJonathan Kim link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | 12793a87177eSHarish Kasiviswanathan CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; 1280bdd24657SJonathan Kim /* set gpu (dev) flags. */ 1281bdd24657SJonathan Kim } else { 12828dc1db31SMukul Joshi if (!dev->gpu->kfd->pci_atomic_requested || 12837eb0502aSGraham Sider dev->gpu->adev->asic_type == CHIP_HAWAII) 1284bdd24657SJonathan Kim link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | 1285d35f00d8SEric Huang CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; 1286deb68983SJonathan Kim } 1287bdd24657SJonathan Kim } 1288bdd24657SJonathan Kim 1289c9cfbf7fSEric Huang static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, 1290c9cfbf7fSEric Huang struct kfd_iolink_properties *outbound_link, 1291c9cfbf7fSEric Huang struct kfd_iolink_properties *inbound_link) 1292c9cfbf7fSEric Huang { 1293c9cfbf7fSEric Huang /* CPU -> GPU with PCIe */ 1294c9cfbf7fSEric Huang if (!to_dev->gpu && 1295c9cfbf7fSEric Huang inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 1296c9cfbf7fSEric Huang inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; 1297c9cfbf7fSEric Huang 1298c9cfbf7fSEric Huang if (to_dev->gpu) { 1299c9cfbf7fSEric Huang /* GPU <-> GPU with PCIe and 1300c9cfbf7fSEric Huang * Vega20 with XGMI 1301c9cfbf7fSEric Huang */ 1302c9cfbf7fSEric Huang if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || 1303c9cfbf7fSEric Huang (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && 1304046e674bSGraham Sider KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { 1305c9cfbf7fSEric Huang outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; 1306c9cfbf7fSEric Huang inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; 1307c9cfbf7fSEric Huang } 1308c9cfbf7fSEric Huang } 1309c9cfbf7fSEric Huang } 1310c9cfbf7fSEric Huang 1311bdd24657SJonathan Kim static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) 1312bdd24657SJonathan Kim { 1313bdd24657SJonathan Kim struct kfd_iolink_properties *link, *inbound_link; 1314bdd24657SJonathan Kim struct kfd_topology_device *peer_dev; 1315bdd24657SJonathan Kim 1316bdd24657SJonathan Kim if (!dev || !dev->gpu) 1317bdd24657SJonathan Kim return; 1318d35f00d8SEric Huang 1319d35f00d8SEric Huang /* GPU only creates direct links so apply flags setting to all */ 1320d35f00d8SEric Huang list_for_each_entry(link, &dev->io_link_props, list) { 1321bdd24657SJonathan Kim link->flags = CRAT_IOLINK_FLAGS_ENABLED; 1322bdd24657SJonathan Kim kfd_set_iolink_no_atomics(dev, NULL, link); 1323bdd24657SJonathan Kim peer_dev = kfd_topology_device_by_proximity_domain( 1324d35f00d8SEric Huang link->node_to); 1325bdd24657SJonathan Kim 1326bdd24657SJonathan Kim if (!peer_dev) 1327bdd24657SJonathan Kim continue; 1328bdd24657SJonathan Kim 1329a0af5dbdSJonathan Kim /* Include the CPU peer in GPU hive if connected over xGMI. */ 1330b2ef2fdfSRajneesh Bhardwaj if (!peer_dev->gpu && 13311698e200SJonathan Kim link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { 13321698e200SJonathan Kim /* 13331698e200SJonathan Kim * If the GPU is not part of a GPU hive, use its pci 13341698e200SJonathan Kim * device location as the hive ID to bind with the CPU. 13351698e200SJonathan Kim */ 13361698e200SJonathan Kim if (!dev->node_props.hive_id) 13371698e200SJonathan Kim dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); 1338a0af5dbdSJonathan Kim peer_dev->node_props.hive_id = dev->node_props.hive_id; 13391698e200SJonathan Kim } 1340a0af5dbdSJonathan Kim 1341bdd24657SJonathan Kim list_for_each_entry(inbound_link, &peer_dev->io_link_props, 1342bdd24657SJonathan Kim list) { 1343bdd24657SJonathan Kim if (inbound_link->node_to != link->node_from) 1344bdd24657SJonathan Kim continue; 1345bdd24657SJonathan Kim 1346bdd24657SJonathan Kim inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; 1347bdd24657SJonathan Kim kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); 1348c9cfbf7fSEric Huang kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); 1349d35f00d8SEric Huang } 1350d35f00d8SEric Huang } 13510f28cca8SRamesh Errabolu 13520f28cca8SRamesh Errabolu /* Create indirect links so apply flags setting to all */ 13530f28cca8SRamesh Errabolu list_for_each_entry(link, &dev->p2p_link_props, list) { 13540f28cca8SRamesh Errabolu link->flags = CRAT_IOLINK_FLAGS_ENABLED; 13550f28cca8SRamesh Errabolu kfd_set_iolink_no_atomics(dev, NULL, link); 13560f28cca8SRamesh Errabolu peer_dev = kfd_topology_device_by_proximity_domain( 13570f28cca8SRamesh Errabolu link->node_to); 13580f28cca8SRamesh Errabolu 13590f28cca8SRamesh Errabolu if (!peer_dev) 13600f28cca8SRamesh Errabolu continue; 13610f28cca8SRamesh Errabolu 13620f28cca8SRamesh Errabolu list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, 13630f28cca8SRamesh Errabolu list) { 13640f28cca8SRamesh Errabolu if (inbound_link->node_to != link->node_from) 13650f28cca8SRamesh Errabolu continue; 13660f28cca8SRamesh Errabolu 13670f28cca8SRamesh Errabolu inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; 13680f28cca8SRamesh Errabolu kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); 13690f28cca8SRamesh Errabolu kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); 13700f28cca8SRamesh Errabolu } 13710f28cca8SRamesh Errabolu } 13720f28cca8SRamesh Errabolu } 13730f28cca8SRamesh Errabolu 13740f28cca8SRamesh Errabolu static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, 13750f28cca8SRamesh Errabolu struct kfd_iolink_properties *p2plink) 13760f28cca8SRamesh Errabolu { 13770f28cca8SRamesh Errabolu int ret; 13780f28cca8SRamesh Errabolu 13790f28cca8SRamesh Errabolu p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); 13800f28cca8SRamesh Errabolu if (!p2plink->kobj) 13810f28cca8SRamesh Errabolu return -ENOMEM; 13820f28cca8SRamesh Errabolu 13830f28cca8SRamesh Errabolu ret = kobject_init_and_add(p2plink->kobj, &iolink_type, 13840f28cca8SRamesh Errabolu dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); 13850f28cca8SRamesh Errabolu if (ret < 0) { 13860f28cca8SRamesh Errabolu kobject_put(p2plink->kobj); 13870f28cca8SRamesh Errabolu return ret; 13880f28cca8SRamesh Errabolu } 13890f28cca8SRamesh Errabolu 13900f28cca8SRamesh Errabolu p2plink->attr.name = "properties"; 13910f28cca8SRamesh Errabolu p2plink->attr.mode = KFD_SYSFS_FILE_MODE; 13920f28cca8SRamesh Errabolu sysfs_attr_init(&p2plink->attr); 13930f28cca8SRamesh Errabolu ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); 13940f28cca8SRamesh Errabolu if (ret < 0) 13950f28cca8SRamesh Errabolu return ret; 13960f28cca8SRamesh Errabolu 13970f28cca8SRamesh Errabolu return 0; 13980f28cca8SRamesh Errabolu } 13990f28cca8SRamesh Errabolu 14000f28cca8SRamesh Errabolu static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) 14010f28cca8SRamesh Errabolu { 14027d50b92dSDan Carpenter struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; 14030f28cca8SRamesh Errabolu struct kfd_iolink_properties *props = NULL, *props2 = NULL; 14040f28cca8SRamesh Errabolu struct kfd_topology_device *cpu_dev; 14050f28cca8SRamesh Errabolu int ret = 0; 14060f28cca8SRamesh Errabolu int i, num_cpu; 14070f28cca8SRamesh Errabolu 14080f28cca8SRamesh Errabolu num_cpu = 0; 14090f28cca8SRamesh Errabolu list_for_each_entry(cpu_dev, &topology_device_list, list) { 14100f28cca8SRamesh Errabolu if (cpu_dev->gpu) 14110f28cca8SRamesh Errabolu break; 14120f28cca8SRamesh Errabolu num_cpu++; 14130f28cca8SRamesh Errabolu } 14140f28cca8SRamesh Errabolu 14150f28cca8SRamesh Errabolu gpu_link = list_first_entry(&kdev->io_link_props, 14160f28cca8SRamesh Errabolu struct kfd_iolink_properties, list); 14170f28cca8SRamesh Errabolu if (!gpu_link) 14180f28cca8SRamesh Errabolu return -ENOMEM; 14190f28cca8SRamesh Errabolu 14200f28cca8SRamesh Errabolu for (i = 0; i < num_cpu; i++) { 14210f28cca8SRamesh Errabolu /* CPU <--> GPU */ 14220f28cca8SRamesh Errabolu if (gpu_link->node_to == i) 14230f28cca8SRamesh Errabolu continue; 14240f28cca8SRamesh Errabolu 14250f28cca8SRamesh Errabolu /* find CPU <--> CPU links */ 14267d50b92dSDan Carpenter cpu_link = NULL; 14270f28cca8SRamesh Errabolu cpu_dev = kfd_topology_device_by_proximity_domain(i); 14280f28cca8SRamesh Errabolu if (cpu_dev) { 14297d50b92dSDan Carpenter list_for_each_entry(tmp_link, 14300f28cca8SRamesh Errabolu &cpu_dev->io_link_props, list) { 14317d50b92dSDan Carpenter if (tmp_link->node_to == gpu_link->node_to) { 14327d50b92dSDan Carpenter cpu_link = tmp_link; 14330f28cca8SRamesh Errabolu break; 14340f28cca8SRamesh Errabolu } 14350f28cca8SRamesh Errabolu } 14367d50b92dSDan Carpenter } 14370f28cca8SRamesh Errabolu 14387d50b92dSDan Carpenter if (!cpu_link) 14390f28cca8SRamesh Errabolu return -ENOMEM; 14400f28cca8SRamesh Errabolu 14410f28cca8SRamesh Errabolu /* CPU <--> CPU <--> GPU, GPU node*/ 14420f28cca8SRamesh Errabolu props = kfd_alloc_struct(props); 14430f28cca8SRamesh Errabolu if (!props) 14440f28cca8SRamesh Errabolu return -ENOMEM; 14450f28cca8SRamesh Errabolu 14460f28cca8SRamesh Errabolu memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties)); 14470f28cca8SRamesh Errabolu props->weight = gpu_link->weight + cpu_link->weight; 14480f28cca8SRamesh Errabolu props->min_latency = gpu_link->min_latency + cpu_link->min_latency; 14490f28cca8SRamesh Errabolu props->max_latency = gpu_link->max_latency + cpu_link->max_latency; 14500f28cca8SRamesh Errabolu props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); 14510f28cca8SRamesh Errabolu props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); 14520f28cca8SRamesh Errabolu 14530f28cca8SRamesh Errabolu props->node_from = gpu_node; 14540f28cca8SRamesh Errabolu props->node_to = i; 14550f28cca8SRamesh Errabolu kdev->node_props.p2p_links_count++; 14560f28cca8SRamesh Errabolu list_add_tail(&props->list, &kdev->p2p_link_props); 14570f28cca8SRamesh Errabolu ret = kfd_build_p2p_node_entry(kdev, props); 14580f28cca8SRamesh Errabolu if (ret < 0) 14590f28cca8SRamesh Errabolu return ret; 14600f28cca8SRamesh Errabolu 14610f28cca8SRamesh Errabolu /* for small Bar, no CPU --> GPU in-direct links */ 14620f28cca8SRamesh Errabolu if (kfd_dev_is_large_bar(kdev->gpu)) { 14630f28cca8SRamesh Errabolu /* CPU <--> CPU <--> GPU, CPU node*/ 14640f28cca8SRamesh Errabolu props2 = kfd_alloc_struct(props2); 14650f28cca8SRamesh Errabolu if (!props2) 14660f28cca8SRamesh Errabolu return -ENOMEM; 14670f28cca8SRamesh Errabolu 14680f28cca8SRamesh Errabolu memcpy(props2, props, sizeof(struct kfd_iolink_properties)); 14690f28cca8SRamesh Errabolu props2->node_from = i; 14700f28cca8SRamesh Errabolu props2->node_to = gpu_node; 14710f28cca8SRamesh Errabolu props2->kobj = NULL; 14720f28cca8SRamesh Errabolu cpu_dev->node_props.p2p_links_count++; 14730f28cca8SRamesh Errabolu list_add_tail(&props2->list, &cpu_dev->p2p_link_props); 14740f28cca8SRamesh Errabolu ret = kfd_build_p2p_node_entry(cpu_dev, props2); 14750f28cca8SRamesh Errabolu if (ret < 0) 14760f28cca8SRamesh Errabolu return ret; 14770f28cca8SRamesh Errabolu } 14780f28cca8SRamesh Errabolu } 14790f28cca8SRamesh Errabolu return ret; 14800f28cca8SRamesh Errabolu } 14810f28cca8SRamesh Errabolu 14820f28cca8SRamesh Errabolu #if defined(CONFIG_HSA_AMD_P2P) 14830f28cca8SRamesh Errabolu static int kfd_add_peer_prop(struct kfd_topology_device *kdev, 14840f28cca8SRamesh Errabolu struct kfd_topology_device *peer, int from, int to) 14850f28cca8SRamesh Errabolu { 14860f28cca8SRamesh Errabolu struct kfd_iolink_properties *props = NULL; 14870f28cca8SRamesh Errabolu struct kfd_iolink_properties *iolink1, *iolink2, *iolink3; 14880f28cca8SRamesh Errabolu struct kfd_topology_device *cpu_dev; 14890f28cca8SRamesh Errabolu int ret = 0; 14900f28cca8SRamesh Errabolu 14910f28cca8SRamesh Errabolu if (!amdgpu_device_is_peer_accessible( 14920f28cca8SRamesh Errabolu kdev->gpu->adev, 14930f28cca8SRamesh Errabolu peer->gpu->adev)) 14940f28cca8SRamesh Errabolu return ret; 14950f28cca8SRamesh Errabolu 14960f28cca8SRamesh Errabolu iolink1 = list_first_entry(&kdev->io_link_props, 14970f28cca8SRamesh Errabolu struct kfd_iolink_properties, list); 14980f28cca8SRamesh Errabolu if (!iolink1) 14990f28cca8SRamesh Errabolu return -ENOMEM; 15000f28cca8SRamesh Errabolu 15010f28cca8SRamesh Errabolu iolink2 = list_first_entry(&peer->io_link_props, 15020f28cca8SRamesh Errabolu struct kfd_iolink_properties, list); 15030f28cca8SRamesh Errabolu if (!iolink2) 15040f28cca8SRamesh Errabolu return -ENOMEM; 15050f28cca8SRamesh Errabolu 15060f28cca8SRamesh Errabolu props = kfd_alloc_struct(props); 15070f28cca8SRamesh Errabolu if (!props) 15080f28cca8SRamesh Errabolu return -ENOMEM; 15090f28cca8SRamesh Errabolu 15100f28cca8SRamesh Errabolu memcpy(props, iolink1, sizeof(struct kfd_iolink_properties)); 15110f28cca8SRamesh Errabolu 15120f28cca8SRamesh Errabolu props->weight = iolink1->weight + iolink2->weight; 15130f28cca8SRamesh Errabolu props->min_latency = iolink1->min_latency + iolink2->min_latency; 15140f28cca8SRamesh Errabolu props->max_latency = iolink1->max_latency + iolink2->max_latency; 15150f28cca8SRamesh Errabolu props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); 15160f28cca8SRamesh Errabolu props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); 15170f28cca8SRamesh Errabolu 15180f28cca8SRamesh Errabolu if (iolink1->node_to != iolink2->node_to) { 15190f28cca8SRamesh Errabolu /* CPU->CPU link*/ 15200f28cca8SRamesh Errabolu cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); 15210f28cca8SRamesh Errabolu if (cpu_dev) { 15220f28cca8SRamesh Errabolu list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) 15230f28cca8SRamesh Errabolu if (iolink3->node_to == iolink2->node_to) 15240f28cca8SRamesh Errabolu break; 15250f28cca8SRamesh Errabolu 15260f28cca8SRamesh Errabolu props->weight += iolink3->weight; 15270f28cca8SRamesh Errabolu props->min_latency += iolink3->min_latency; 15280f28cca8SRamesh Errabolu props->max_latency += iolink3->max_latency; 15290f28cca8SRamesh Errabolu props->min_bandwidth = min(props->min_bandwidth, 15300f28cca8SRamesh Errabolu iolink3->min_bandwidth); 15310f28cca8SRamesh Errabolu props->max_bandwidth = min(props->max_bandwidth, 15320f28cca8SRamesh Errabolu iolink3->max_bandwidth); 15330f28cca8SRamesh Errabolu } else { 15340f28cca8SRamesh Errabolu WARN(1, "CPU node not found"); 15350f28cca8SRamesh Errabolu } 15360f28cca8SRamesh Errabolu } 15370f28cca8SRamesh Errabolu 15380f28cca8SRamesh Errabolu props->node_from = from; 15390f28cca8SRamesh Errabolu props->node_to = to; 15400f28cca8SRamesh Errabolu peer->node_props.p2p_links_count++; 15410f28cca8SRamesh Errabolu list_add_tail(&props->list, &peer->p2p_link_props); 15420f28cca8SRamesh Errabolu ret = kfd_build_p2p_node_entry(peer, props); 15430f28cca8SRamesh Errabolu 15440f28cca8SRamesh Errabolu return ret; 15450f28cca8SRamesh Errabolu } 15460f28cca8SRamesh Errabolu #endif 15470f28cca8SRamesh Errabolu 15480f28cca8SRamesh Errabolu static int kfd_dev_create_p2p_links(void) 15490f28cca8SRamesh Errabolu { 15500f28cca8SRamesh Errabolu struct kfd_topology_device *dev; 15510f28cca8SRamesh Errabolu struct kfd_topology_device *new_dev; 1552914da384SAlex Deucher #if defined(CONFIG_HSA_AMD_P2P) 1553914da384SAlex Deucher uint32_t i; 1554914da384SAlex Deucher #endif 1555914da384SAlex Deucher uint32_t k; 15560f28cca8SRamesh Errabolu int ret = 0; 15570f28cca8SRamesh Errabolu 15580f28cca8SRamesh Errabolu k = 0; 15590f28cca8SRamesh Errabolu list_for_each_entry(dev, &topology_device_list, list) 15600f28cca8SRamesh Errabolu k++; 15610f28cca8SRamesh Errabolu if (k < 2) 15620f28cca8SRamesh Errabolu return 0; 15630f28cca8SRamesh Errabolu 15640f28cca8SRamesh Errabolu new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); 15650f28cca8SRamesh Errabolu if (WARN_ON(!new_dev->gpu)) 15660f28cca8SRamesh Errabolu return 0; 15670f28cca8SRamesh Errabolu 15680f28cca8SRamesh Errabolu k--; 15690f28cca8SRamesh Errabolu 15700f28cca8SRamesh Errabolu /* create in-direct links */ 15710f28cca8SRamesh Errabolu ret = kfd_create_indirect_link_prop(new_dev, k); 15720f28cca8SRamesh Errabolu if (ret < 0) 15730f28cca8SRamesh Errabolu goto out; 15740f28cca8SRamesh Errabolu 15750f28cca8SRamesh Errabolu /* create p2p links */ 15760f28cca8SRamesh Errabolu #if defined(CONFIG_HSA_AMD_P2P) 1577914da384SAlex Deucher i = 0; 15780f28cca8SRamesh Errabolu list_for_each_entry(dev, &topology_device_list, list) { 15790f28cca8SRamesh Errabolu if (dev == new_dev) 15800f28cca8SRamesh Errabolu break; 15810f28cca8SRamesh Errabolu if (!dev->gpu || !dev->gpu->adev || 15828dc1db31SMukul Joshi (dev->gpu->kfd->hive_id && 15838dc1db31SMukul Joshi dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) 15840f28cca8SRamesh Errabolu goto next; 15850f28cca8SRamesh Errabolu 15860f28cca8SRamesh Errabolu /* check if node(s) is/are peer accessible in one direction or bi-direction */ 15870f28cca8SRamesh Errabolu ret = kfd_add_peer_prop(new_dev, dev, i, k); 15880f28cca8SRamesh Errabolu if (ret < 0) 15890f28cca8SRamesh Errabolu goto out; 15900f28cca8SRamesh Errabolu 15910f28cca8SRamesh Errabolu ret = kfd_add_peer_prop(dev, new_dev, k, i); 15920f28cca8SRamesh Errabolu if (ret < 0) 15930f28cca8SRamesh Errabolu goto out; 15940f28cca8SRamesh Errabolu next: 15950f28cca8SRamesh Errabolu i++; 15960f28cca8SRamesh Errabolu } 15970f28cca8SRamesh Errabolu #endif 15980f28cca8SRamesh Errabolu 15990f28cca8SRamesh Errabolu out: 16000f28cca8SRamesh Errabolu return ret; 16013a87177eSHarish Kasiviswanathan } 16023a87177eSHarish Kasiviswanathan 1603c0cc999fSMa Jun /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ 1604c0cc999fSMa Jun static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, 1605c0cc999fSMa Jun struct kfd_gpu_cache_info *pcache_info, 1606c0cc999fSMa Jun struct kfd_cu_info *cu_info, 1607c0cc999fSMa Jun int cu_bitmask, 1608c0cc999fSMa Jun int cache_type, unsigned int cu_processor_id, 1609c0cc999fSMa Jun int cu_block) 1610c0cc999fSMa Jun { 1611c0cc999fSMa Jun unsigned int cu_sibling_map_mask; 1612c0cc999fSMa Jun int first_active_cu; 1613c0cc999fSMa Jun struct kfd_cache_properties *pcache = NULL; 1614c0cc999fSMa Jun 1615c0cc999fSMa Jun cu_sibling_map_mask = cu_bitmask; 1616c0cc999fSMa Jun cu_sibling_map_mask >>= cu_block; 1617c0cc999fSMa Jun cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); 1618c0cc999fSMa Jun first_active_cu = ffs(cu_sibling_map_mask); 1619c0cc999fSMa Jun 1620c0cc999fSMa Jun /* CU could be inactive. In case of shared cache find the first active 1621c0cc999fSMa Jun * CU. and incase of non-shared cache check if the CU is inactive. If 1622c0cc999fSMa Jun * inactive active skip it 1623c0cc999fSMa Jun */ 1624c0cc999fSMa Jun if (first_active_cu) { 1625c0cc999fSMa Jun pcache = kfd_alloc_struct(pcache); 1626c0cc999fSMa Jun if (!pcache) 1627c0cc999fSMa Jun return -ENOMEM; 1628c0cc999fSMa Jun 1629c0cc999fSMa Jun memset(pcache, 0, sizeof(struct kfd_cache_properties)); 1630c0cc999fSMa Jun pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); 1631c0cc999fSMa Jun pcache->cache_level = pcache_info[cache_type].cache_level; 1632c0cc999fSMa Jun pcache->cache_size = pcache_info[cache_type].cache_size; 1633c0cc999fSMa Jun 1634c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) 1635c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_DATA; 1636c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) 1637c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 1638c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) 1639c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_CPU; 1640c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 1641c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_HSACU; 1642c0cc999fSMa Jun 1643c0cc999fSMa Jun /* Sibling map is w.r.t processor_id_low, so shift out 1644c0cc999fSMa Jun * inactive CU 1645c0cc999fSMa Jun */ 1646c0cc999fSMa Jun cu_sibling_map_mask = 1647c0cc999fSMa Jun cu_sibling_map_mask >> (first_active_cu - 1); 1648c0cc999fSMa Jun 1649c0cc999fSMa Jun pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); 1650c0cc999fSMa Jun pcache->sibling_map[1] = 1651c0cc999fSMa Jun (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); 1652c0cc999fSMa Jun pcache->sibling_map[2] = 1653c0cc999fSMa Jun (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); 1654c0cc999fSMa Jun pcache->sibling_map[3] = 1655c0cc999fSMa Jun (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); 1656c0cc999fSMa Jun 1657c0cc999fSMa Jun pcache->sibling_map_size = 4; 1658c0cc999fSMa Jun *props_ext = pcache; 1659c0cc999fSMa Jun 1660c0cc999fSMa Jun return 0; 1661c0cc999fSMa Jun } 1662c0cc999fSMa Jun return 1; 1663c0cc999fSMa Jun } 1664c0cc999fSMa Jun 1665c0cc999fSMa Jun /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ 1666c0cc999fSMa Jun static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, 1667c0cc999fSMa Jun struct kfd_gpu_cache_info *pcache_info, 1668c0cc999fSMa Jun struct kfd_cu_info *cu_info, 1669c0cc999fSMa Jun int cache_type, unsigned int cu_processor_id) 1670c0cc999fSMa Jun { 1671c0cc999fSMa Jun unsigned int cu_sibling_map_mask; 1672c0cc999fSMa Jun int first_active_cu; 1673c0cc999fSMa Jun int i, j, k; 1674c0cc999fSMa Jun struct kfd_cache_properties *pcache = NULL; 1675c0cc999fSMa Jun 1676c0cc999fSMa Jun cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; 1677c0cc999fSMa Jun cu_sibling_map_mask &= 1678c0cc999fSMa Jun ((1 << pcache_info[cache_type].num_cu_shared) - 1); 1679c0cc999fSMa Jun first_active_cu = ffs(cu_sibling_map_mask); 1680c0cc999fSMa Jun 1681c0cc999fSMa Jun /* CU could be inactive. In case of shared cache find the first active 1682c0cc999fSMa Jun * CU. and incase of non-shared cache check if the CU is inactive. If 1683c0cc999fSMa Jun * inactive active skip it 1684c0cc999fSMa Jun */ 1685c0cc999fSMa Jun if (first_active_cu) { 1686c0cc999fSMa Jun pcache = kfd_alloc_struct(pcache); 1687c0cc999fSMa Jun if (!pcache) 1688c0cc999fSMa Jun return -ENOMEM; 1689c0cc999fSMa Jun 1690c0cc999fSMa Jun memset(pcache, 0, sizeof(struct kfd_cache_properties)); 1691c0cc999fSMa Jun pcache->processor_id_low = cu_processor_id 1692c0cc999fSMa Jun + (first_active_cu - 1); 1693c0cc999fSMa Jun pcache->cache_level = pcache_info[cache_type].cache_level; 1694c0cc999fSMa Jun pcache->cache_size = pcache_info[cache_type].cache_size; 1695c0cc999fSMa Jun 1696c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) 1697c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_DATA; 1698c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) 1699c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 1700c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) 1701c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_CPU; 1702c0cc999fSMa Jun if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 1703c0cc999fSMa Jun pcache->cache_type |= HSA_CACHE_TYPE_HSACU; 1704c0cc999fSMa Jun 1705c0cc999fSMa Jun /* Sibling map is w.r.t processor_id_low, so shift out 1706c0cc999fSMa Jun * inactive CU 1707c0cc999fSMa Jun */ 1708c0cc999fSMa Jun cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); 1709c0cc999fSMa Jun k = 0; 1710c0cc999fSMa Jun 1711c0cc999fSMa Jun for (i = 0; i < cu_info->num_shader_engines; i++) { 1712c0cc999fSMa Jun for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { 1713c0cc999fSMa Jun pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); 1714c0cc999fSMa Jun pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); 1715c0cc999fSMa Jun pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); 1716c0cc999fSMa Jun pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); 1717c0cc999fSMa Jun k += 4; 1718c0cc999fSMa Jun 1719c0cc999fSMa Jun cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; 1720c0cc999fSMa Jun cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); 1721c0cc999fSMa Jun } 1722c0cc999fSMa Jun } 1723c0cc999fSMa Jun pcache->sibling_map_size = k; 1724c0cc999fSMa Jun *props_ext = pcache; 1725c0cc999fSMa Jun return 0; 1726c0cc999fSMa Jun } 1727c0cc999fSMa Jun return 1; 1728c0cc999fSMa Jun } 1729c0cc999fSMa Jun 1730c0cc999fSMa Jun #define KFD_MAX_CACHE_TYPES 6 1731c0cc999fSMa Jun 1732c0cc999fSMa Jun /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info 1733c0cc999fSMa Jun * tables 1734c0cc999fSMa Jun */ 17358dc1db31SMukul Joshi static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) 1736c0cc999fSMa Jun { 1737c0cc999fSMa Jun struct kfd_gpu_cache_info *pcache_info = NULL; 1738c0cc999fSMa Jun int i, j, k; 1739c0cc999fSMa Jun int ct = 0; 1740c0cc999fSMa Jun unsigned int cu_processor_id; 1741c0cc999fSMa Jun int ret; 1742c0cc999fSMa Jun unsigned int num_cu_shared; 1743c0cc999fSMa Jun struct kfd_cu_info cu_info; 1744c0cc999fSMa Jun struct kfd_cu_info *pcu_info; 1745c0cc999fSMa Jun int gpu_processor_id; 1746c0cc999fSMa Jun struct kfd_cache_properties *props_ext; 1747c0cc999fSMa Jun int num_of_entries = 0; 1748c0cc999fSMa Jun int num_of_cache_types = 0; 1749c0cc999fSMa Jun struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; 1750c0cc999fSMa Jun 1751c0cc999fSMa Jun amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); 1752c0cc999fSMa Jun pcu_info = &cu_info; 1753c0cc999fSMa Jun 1754c0cc999fSMa Jun gpu_processor_id = dev->node_props.simd_id_base; 1755c0cc999fSMa Jun 1756c0cc999fSMa Jun pcache_info = cache_info; 1757c0cc999fSMa Jun num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info); 1758c0cc999fSMa Jun if (!num_of_cache_types) { 1759c0cc999fSMa Jun pr_warn("no cache info found\n"); 1760c0cc999fSMa Jun return; 1761c0cc999fSMa Jun } 1762c0cc999fSMa Jun 1763c0cc999fSMa Jun /* For each type of cache listed in the kfd_gpu_cache_info table, 1764c0cc999fSMa Jun * go through all available Compute Units. 1765c0cc999fSMa Jun * The [i,j,k] loop will 1766c0cc999fSMa Jun * if kfd_gpu_cache_info.num_cu_shared = 1 1767c0cc999fSMa Jun * will parse through all available CU 1768c0cc999fSMa Jun * If (kfd_gpu_cache_info.num_cu_shared != 1) 1769c0cc999fSMa Jun * then it will consider only one CU from 1770c0cc999fSMa Jun * the shared unit 1771c0cc999fSMa Jun */ 1772c0cc999fSMa Jun for (ct = 0; ct < num_of_cache_types; ct++) { 1773c0cc999fSMa Jun cu_processor_id = gpu_processor_id; 1774c0cc999fSMa Jun if (pcache_info[ct].cache_level == 1) { 1775c0cc999fSMa Jun for (i = 0; i < pcu_info->num_shader_engines; i++) { 1776c0cc999fSMa Jun for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { 1777c0cc999fSMa Jun for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { 1778c0cc999fSMa Jun 1779c0cc999fSMa Jun ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, 1780c0cc999fSMa Jun pcu_info->cu_bitmap[i % 4][j + i / 4], ct, 1781c0cc999fSMa Jun cu_processor_id, k); 1782c0cc999fSMa Jun 1783c0cc999fSMa Jun if (ret < 0) 1784c0cc999fSMa Jun break; 1785c0cc999fSMa Jun 1786c0cc999fSMa Jun if (!ret) { 1787c0cc999fSMa Jun num_of_entries++; 1788c0cc999fSMa Jun list_add_tail(&props_ext->list, &dev->cache_props); 1789c0cc999fSMa Jun } 1790c0cc999fSMa Jun 1791c0cc999fSMa Jun /* Move to next CU block */ 1792c0cc999fSMa Jun num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= 1793c0cc999fSMa Jun pcu_info->num_cu_per_sh) ? 1794c0cc999fSMa Jun pcache_info[ct].num_cu_shared : 1795c0cc999fSMa Jun (pcu_info->num_cu_per_sh - k); 1796c0cc999fSMa Jun cu_processor_id += num_cu_shared; 1797c0cc999fSMa Jun } 1798c0cc999fSMa Jun } 1799c0cc999fSMa Jun } 1800c0cc999fSMa Jun } else { 1801c0cc999fSMa Jun ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, 1802c0cc999fSMa Jun pcu_info, ct, cu_processor_id); 1803c0cc999fSMa Jun 1804c0cc999fSMa Jun if (ret < 0) 1805c0cc999fSMa Jun break; 1806c0cc999fSMa Jun 1807c0cc999fSMa Jun if (!ret) { 1808c0cc999fSMa Jun num_of_entries++; 1809c0cc999fSMa Jun list_add_tail(&props_ext->list, &dev->cache_props); 1810c0cc999fSMa Jun } 1811c0cc999fSMa Jun } 1812c0cc999fSMa Jun } 1813c0cc999fSMa Jun dev->node_props.caches_count += num_of_entries; 1814c0cc999fSMa Jun pr_debug("Added [%d] GPU cache entries\n", num_of_entries); 1815c0cc999fSMa Jun } 1816c0cc999fSMa Jun 18178dc1db31SMukul Joshi static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, 1818f701acb6SFelix Kuehling struct kfd_topology_device **dev) 18195b5c4e40SEvgeny Pinchuk { 1820f701acb6SFelix Kuehling int proximity_domain = ++topology_crat_proximity_domain; 18214f449311SHarish Kasiviswanathan struct list_head temp_topology_device_list; 18223a87177eSHarish Kasiviswanathan void *crat_image = NULL; 18233a87177eSHarish Kasiviswanathan size_t image_size = 0; 1824f701acb6SFelix Kuehling int res; 182546d18d51SMukul Joshi 18263a87177eSHarish Kasiviswanathan res = kfd_create_crat_image_virtual(&crat_image, &image_size, 18273a87177eSHarish Kasiviswanathan COMPUTE_UNIT_GPU, gpu, 18283a87177eSHarish Kasiviswanathan proximity_domain); 18293a87177eSHarish Kasiviswanathan if (res) { 18303a87177eSHarish Kasiviswanathan pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", 18313a87177eSHarish Kasiviswanathan gpu_id); 183246d18d51SMukul Joshi topology_crat_proximity_domain--; 1833f701acb6SFelix Kuehling goto err; 18343a87177eSHarish Kasiviswanathan } 1835c0cc999fSMa Jun 1836f701acb6SFelix Kuehling INIT_LIST_HEAD(&temp_topology_device_list); 1837f701acb6SFelix Kuehling 18383a87177eSHarish Kasiviswanathan res = kfd_parse_crat_table(crat_image, 18393a87177eSHarish Kasiviswanathan &temp_topology_device_list, 18403a87177eSHarish Kasiviswanathan proximity_domain); 18413a87177eSHarish Kasiviswanathan if (res) { 18423a87177eSHarish Kasiviswanathan pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", 18433a87177eSHarish Kasiviswanathan gpu_id); 184446d18d51SMukul Joshi topology_crat_proximity_domain--; 18455b5c4e40SEvgeny Pinchuk goto err; 18465b5c4e40SEvgeny Pinchuk } 18474f449311SHarish Kasiviswanathan 18484f449311SHarish Kasiviswanathan kfd_topology_update_device_list(&temp_topology_device_list, 18494f449311SHarish Kasiviswanathan &topology_device_list); 18504f449311SHarish Kasiviswanathan 1851f701acb6SFelix Kuehling *dev = kfd_assign_gpu(gpu); 1852f701acb6SFelix Kuehling if (WARN_ON(!*dev)) { 18533a87177eSHarish Kasiviswanathan res = -ENODEV; 18543a87177eSHarish Kasiviswanathan goto err; 18553a87177eSHarish Kasiviswanathan } 1856c0cc999fSMa Jun 1857c0cc999fSMa Jun /* Fill the cache affinity information here for the GPUs 1858c0cc999fSMa Jun * using VCRAT 1859c0cc999fSMa Jun */ 1860f701acb6SFelix Kuehling kfd_fill_cache_non_crat_info(*dev, gpu); 1861c0cc999fSMa Jun 1862c0cc999fSMa Jun /* Update the SYSFS tree, since we added another topology 1863c0cc999fSMa Jun * device 1864c0cc999fSMa Jun */ 1865c0cc999fSMa Jun res = kfd_topology_update_sysfs(); 1866c0cc999fSMa Jun if (!res) 1867c0cc999fSMa Jun sys_props.generation_count++; 1868c0cc999fSMa Jun else 1869c0cc999fSMa Jun pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", 1870c0cc999fSMa Jun gpu_id, res); 1871f701acb6SFelix Kuehling 1872f701acb6SFelix Kuehling err: 1873f701acb6SFelix Kuehling kfd_destroy_crat_image(crat_image); 1874f701acb6SFelix Kuehling return res; 18755b5c4e40SEvgeny Pinchuk } 1876f701acb6SFelix Kuehling 18778dc1db31SMukul Joshi int kfd_topology_add_device(struct kfd_node *gpu) 1878f701acb6SFelix Kuehling { 1879f701acb6SFelix Kuehling uint32_t gpu_id; 1880f701acb6SFelix Kuehling struct kfd_topology_device *dev; 1881f701acb6SFelix Kuehling struct kfd_cu_info cu_info; 1882f701acb6SFelix Kuehling int res = 0; 1883f701acb6SFelix Kuehling int i; 1884f701acb6SFelix Kuehling const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; 1885f701acb6SFelix Kuehling 1886f701acb6SFelix Kuehling gpu_id = kfd_generate_gpu_id(gpu); 1887f701acb6SFelix Kuehling pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); 1888f701acb6SFelix Kuehling 1889f701acb6SFelix Kuehling /* Check to see if this gpu device exists in the topology_device_list. 1890f701acb6SFelix Kuehling * If so, assign the gpu to that device, 1891f701acb6SFelix Kuehling * else create a Virtual CRAT for this gpu device and then parse that 1892f701acb6SFelix Kuehling * CRAT to create a new topology device. Once created assign the gpu to 1893f701acb6SFelix Kuehling * that topology device 1894f701acb6SFelix Kuehling */ 1895f701acb6SFelix Kuehling down_write(&topology_lock); 1896f701acb6SFelix Kuehling dev = kfd_assign_gpu(gpu); 1897f701acb6SFelix Kuehling if (!dev) 1898f701acb6SFelix Kuehling res = kfd_topology_add_device_locked(gpu, gpu_id, &dev); 1899c0cc999fSMa Jun up_write(&topology_lock); 1900f701acb6SFelix Kuehling if (res) 1901f701acb6SFelix Kuehling return res; 19025b5c4e40SEvgeny Pinchuk 19035b5c4e40SEvgeny Pinchuk dev->gpu_id = gpu_id; 19045b5c4e40SEvgeny Pinchuk gpu->id = gpu_id; 19053a87177eSHarish Kasiviswanathan 19060f28cca8SRamesh Errabolu kfd_dev_create_p2p_links(); 19070f28cca8SRamesh Errabolu 19083a87177eSHarish Kasiviswanathan /* TODO: Move the following lines to function 19093a87177eSHarish Kasiviswanathan * kfd_add_non_crat_information 19103a87177eSHarish Kasiviswanathan */ 19113a87177eSHarish Kasiviswanathan 19123a87177eSHarish Kasiviswanathan /* Fill-in additional information that is not available in CRAT but 19133a87177eSHarish Kasiviswanathan * needed for the topology 19143a87177eSHarish Kasiviswanathan */ 19153a87177eSHarish Kasiviswanathan 1916574c4183SGraham Sider amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info); 1917c181159aSYong Zhao 1918b7675b7bSGraham Sider for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { 1919b7675b7bSGraham Sider dev->node_props.name[i] = __tolower(asic_name[i]); 1920b7675b7bSGraham Sider if (asic_name[i] == '\0') 1921b7675b7bSGraham Sider break; 1922b7675b7bSGraham Sider } 1923b7675b7bSGraham Sider dev->node_props.name[i] = '\0'; 1924c181159aSYong Zhao 19253a87177eSHarish Kasiviswanathan dev->node_props.simd_arrays_per_engine = 19263a87177eSHarish Kasiviswanathan cu_info.num_shader_arrays_per_engine; 19273a87177eSHarish Kasiviswanathan 19288dc1db31SMukul Joshi dev->node_props.gfx_target_version = 19298dc1db31SMukul Joshi gpu->kfd->device_info.gfx_target_version; 1930d69a3b76SMukul Joshi dev->node_props.vendor_id = gpu->adev->pdev->vendor; 1931d69a3b76SMukul Joshi dev->node_props.device_id = gpu->adev->pdev->device; 1932c6d1ec41SJoseph Greathouse dev->node_props.capability |= 193302274fc0SGraham Sider ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & 1934c6d1ec41SJoseph Greathouse HSA_CAP_ASIC_REVISION_MASK); 193592085240SJonathan Kim 1936d69a3b76SMukul Joshi dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); 193792085240SJonathan Kim if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) 193892085240SJonathan Kim dev->node_props.location_id |= dev->gpu->node_id; 193992085240SJonathan Kim 1940d69a3b76SMukul Joshi dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); 19413a87177eSHarish Kasiviswanathan dev->node_props.max_engine_clk_fcompute = 1942574c4183SGraham Sider amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); 19433a87177eSHarish Kasiviswanathan dev->node_props.max_engine_clk_ccompute = 19443a87177eSHarish Kasiviswanathan cpufreq_quick_get_max(0) / 1000; 19457c9b7171SOak Zeng dev->node_props.drm_render_minor = 19468dc1db31SMukul Joshi gpu->kfd->shared_resources.drm_render_minor; 19475b5c4e40SEvgeny Pinchuk 19488dc1db31SMukul Joshi dev->node_props.hive_id = gpu->kfd->hive_id; 1949ee2f17f4SAmber Lin dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); 195014568cf6SOak Zeng dev->node_props.num_sdma_xgmi_engines = 1951ee2f17f4SAmber Lin kfd_get_num_xgmi_sdma_engines(gpu); 1952bb71c74dSHuang Rui dev->node_props.num_sdma_queues_per_engine = 19538dc1db31SMukul Joshi gpu->kfd->device_info.num_sdma_queues_per_engine - 19548dc1db31SMukul Joshi gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; 195529633d0eSJoseph Greathouse dev->node_props.num_gws = (dev->gpu->gws && 195629e76462SOak Zeng dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? 195702274fc0SGraham Sider dev->gpu->adev->gds.gws_size : 0; 1958e6945304SYong Zhao dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); 19590c1690e3SShaoyun Liu 19603a87177eSHarish Kasiviswanathan kfd_fill_mem_clk_max_info(dev); 19613a87177eSHarish Kasiviswanathan kfd_fill_iolink_non_crat_info(dev); 19623a87177eSHarish Kasiviswanathan 19637eb0502aSGraham Sider switch (dev->gpu->adev->asic_type) { 19643a87177eSHarish Kasiviswanathan case CHIP_KAVERI: 19653a87177eSHarish Kasiviswanathan case CHIP_HAWAII: 19663a87177eSHarish Kasiviswanathan case CHIP_TONGA: 19673a87177eSHarish Kasiviswanathan dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << 19683a87177eSHarish Kasiviswanathan HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & 19693a87177eSHarish Kasiviswanathan HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); 19703a87177eSHarish Kasiviswanathan break; 19713a87177eSHarish Kasiviswanathan case CHIP_CARRIZO: 19723a87177eSHarish Kasiviswanathan case CHIP_FIJI: 19733a87177eSHarish Kasiviswanathan case CHIP_POLARIS10: 19743a87177eSHarish Kasiviswanathan case CHIP_POLARIS11: 1975846a44d7SGang Ba case CHIP_POLARIS12: 1976ed81cd6eSKent Russell case CHIP_VEGAM: 197742aa8793SFelix Kuehling pr_debug("Adding doorbell packet type capability\n"); 19783a87177eSHarish Kasiviswanathan dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << 19793a87177eSHarish Kasiviswanathan HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & 19803a87177eSHarish Kasiviswanathan HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); 19813a87177eSHarish Kasiviswanathan break; 1982e4804a39SGraham Sider default: 1983e4804a39SGraham Sider if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) 1984389056e5SFelix Kuehling dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << 1985389056e5SFelix Kuehling HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & 1986389056e5SFelix Kuehling HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); 1987e4804a39SGraham Sider else 19883a87177eSHarish Kasiviswanathan WARN(1, "Unexpected ASIC family %u", 19897eb0502aSGraham Sider dev->gpu->adev->asic_type); 19907639a8c4SBen Goz } 19917639a8c4SBen Goz 19921ae99eabSOak Zeng /* 19931ae99eabSOak Zeng * Overwrite ATS capability according to needs_iommu_device to fix 19941ae99eabSOak Zeng * potential missing corresponding bit in CRAT of BIOS. 19951ae99eabSOak Zeng */ 19968dc1db31SMukul Joshi if (dev->gpu->kfd->use_iommu_v2) 19971ae99eabSOak Zeng dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 19981ae99eabSOak Zeng else 19991ae99eabSOak Zeng dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; 20001ae99eabSOak Zeng 20013a87177eSHarish Kasiviswanathan /* Fix errors in CZ CRAT. 20023a87177eSHarish Kasiviswanathan * simd_count: Carrizo CRAT reports wrong simd_count, probably 20033a87177eSHarish Kasiviswanathan * because it doesn't consider masked out CUs 200470f372bfSPhilip Cox * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd 20053a87177eSHarish Kasiviswanathan */ 20067eb0502aSGraham Sider if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { 20073a87177eSHarish Kasiviswanathan dev->node_props.simd_count = 20083a87177eSHarish Kasiviswanathan cu_info.simd_per_cu * cu_info.cu_active_number; 200970f372bfSPhilip Cox dev->node_props.max_waves_per_simd = 10; 201070f372bfSPhilip Cox } 20113a87177eSHarish Kasiviswanathan 20125436ab94SStanley.Yang /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ 20130dee45a2SEric Huang dev->node_props.capability |= 201456c5977eSGraham Sider ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? 20150dee45a2SEric Huang HSA_CAP_SRAM_EDCSUPPORTED : 0; 201656c5977eSGraham Sider dev->node_props.capability |= 201756c5977eSGraham Sider ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? 20180dee45a2SEric Huang HSA_CAP_MEM_EDCSUPPORTED : 0; 20190dee45a2SEric Huang 2020046e674bSGraham Sider if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) 202156c5977eSGraham Sider dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? 20220dee45a2SEric Huang HSA_CAP_RASEVENTNOTIFY : 0; 20230dee45a2SEric Huang 202456c5977eSGraham Sider if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev)) 20254c166eb9SPhilip Yang dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; 20264c166eb9SPhilip Yang 20273a87177eSHarish Kasiviswanathan kfd_debug_print_topology(); 20283a87177eSHarish Kasiviswanathan 20295b5c4e40SEvgeny Pinchuk kfd_notify_gpu_change(gpu_id, 1); 2030f701acb6SFelix Kuehling 20317d4f8db4SDan Carpenter return 0; 20325b5c4e40SEvgeny Pinchuk } 20335b5c4e40SEvgeny Pinchuk 203446d18d51SMukul Joshi /** 203546d18d51SMukul Joshi * kfd_topology_update_io_links() - Update IO links after device removal. 203646d18d51SMukul Joshi * @proximity_domain: Proximity domain value of the dev being removed. 203746d18d51SMukul Joshi * 203846d18d51SMukul Joshi * The topology list currently is arranged in increasing order of 203946d18d51SMukul Joshi * proximity domain. 204046d18d51SMukul Joshi * 204146d18d51SMukul Joshi * Two things need to be done when a device is removed: 204246d18d51SMukul Joshi * 1. All the IO links to this device need to be removed. 204346d18d51SMukul Joshi * 2. All nodes after the current device node need to move 204446d18d51SMukul Joshi * up once this device node is removed from the topology 204546d18d51SMukul Joshi * list. As a result, the proximity domain values for 204646d18d51SMukul Joshi * all nodes after the node being deleted reduce by 1. 204746d18d51SMukul Joshi * This would also cause the proximity domain values for 204846d18d51SMukul Joshi * io links to be updated based on new proximity domain 204946d18d51SMukul Joshi * values. 205046d18d51SMukul Joshi * 205146d18d51SMukul Joshi * Context: The caller must hold write topology_lock. 205246d18d51SMukul Joshi */ 205346d18d51SMukul Joshi static void kfd_topology_update_io_links(int proximity_domain) 205446d18d51SMukul Joshi { 205546d18d51SMukul Joshi struct kfd_topology_device *dev; 20560f28cca8SRamesh Errabolu struct kfd_iolink_properties *iolink, *p2plink, *tmp; 205746d18d51SMukul Joshi 205846d18d51SMukul Joshi list_for_each_entry(dev, &topology_device_list, list) { 205946d18d51SMukul Joshi if (dev->proximity_domain > proximity_domain) 206046d18d51SMukul Joshi dev->proximity_domain--; 206146d18d51SMukul Joshi 206246d18d51SMukul Joshi list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { 206346d18d51SMukul Joshi /* 206446d18d51SMukul Joshi * If there is an io link to the dev being deleted 206546d18d51SMukul Joshi * then remove that IO link also. 206646d18d51SMukul Joshi */ 206746d18d51SMukul Joshi if (iolink->node_to == proximity_domain) { 206846d18d51SMukul Joshi list_del(&iolink->list); 206946d18d51SMukul Joshi dev->node_props.io_links_count--; 207098447635SMukul Joshi } else { 207198447635SMukul Joshi if (iolink->node_from > proximity_domain) 207246d18d51SMukul Joshi iolink->node_from--; 207398447635SMukul Joshi if (iolink->node_to > proximity_domain) 207446d18d51SMukul Joshi iolink->node_to--; 207546d18d51SMukul Joshi } 207646d18d51SMukul Joshi } 20770f28cca8SRamesh Errabolu 20780f28cca8SRamesh Errabolu list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { 20790f28cca8SRamesh Errabolu /* 20800f28cca8SRamesh Errabolu * If there is a p2p link to the dev being deleted 20810f28cca8SRamesh Errabolu * then remove that p2p link also. 20820f28cca8SRamesh Errabolu */ 20830f28cca8SRamesh Errabolu if (p2plink->node_to == proximity_domain) { 20840f28cca8SRamesh Errabolu list_del(&p2plink->list); 20850f28cca8SRamesh Errabolu dev->node_props.p2p_links_count--; 20860f28cca8SRamesh Errabolu } else { 20870f28cca8SRamesh Errabolu if (p2plink->node_from > proximity_domain) 20880f28cca8SRamesh Errabolu p2plink->node_from--; 20890f28cca8SRamesh Errabolu if (p2plink->node_to > proximity_domain) 20900f28cca8SRamesh Errabolu p2plink->node_to--; 20910f28cca8SRamesh Errabolu } 20920f28cca8SRamesh Errabolu } 209346d18d51SMukul Joshi } 209446d18d51SMukul Joshi } 209546d18d51SMukul Joshi 20968dc1db31SMukul Joshi int kfd_topology_remove_device(struct kfd_node *gpu) 20975b5c4e40SEvgeny Pinchuk { 20984f449311SHarish Kasiviswanathan struct kfd_topology_device *dev, *tmp; 20995b5c4e40SEvgeny Pinchuk uint32_t gpu_id; 21005b5c4e40SEvgeny Pinchuk int res = -ENODEV; 210146d18d51SMukul Joshi int i = 0; 21025b5c4e40SEvgeny Pinchuk 21035b5c4e40SEvgeny Pinchuk down_write(&topology_lock); 21045b5c4e40SEvgeny Pinchuk 210546d18d51SMukul Joshi list_for_each_entry_safe(dev, tmp, &topology_device_list, list) { 21065b5c4e40SEvgeny Pinchuk if (dev->gpu == gpu) { 21075b5c4e40SEvgeny Pinchuk gpu_id = dev->gpu_id; 21085b5c4e40SEvgeny Pinchuk kfd_remove_sysfs_node_entry(dev); 21095b5c4e40SEvgeny Pinchuk kfd_release_topology_device(dev); 21104f449311SHarish Kasiviswanathan sys_props.num_devices--; 211146d18d51SMukul Joshi kfd_topology_update_io_links(i); 211246d18d51SMukul Joshi topology_crat_proximity_domain = sys_props.num_devices-1; 211346d18d51SMukul Joshi sys_props.generation_count++; 21145b5c4e40SEvgeny Pinchuk res = 0; 21155b5c4e40SEvgeny Pinchuk if (kfd_topology_update_sysfs() < 0) 21165b5c4e40SEvgeny Pinchuk kfd_topology_release_sysfs(); 21175b5c4e40SEvgeny Pinchuk break; 21185b5c4e40SEvgeny Pinchuk } 211946d18d51SMukul Joshi i++; 212046d18d51SMukul Joshi } 21215b5c4e40SEvgeny Pinchuk 21225b5c4e40SEvgeny Pinchuk up_write(&topology_lock); 21235b5c4e40SEvgeny Pinchuk 2124174de876SFelix Kuehling if (!res) 21255b5c4e40SEvgeny Pinchuk kfd_notify_gpu_change(gpu_id, 0); 21265b5c4e40SEvgeny Pinchuk 21275b5c4e40SEvgeny Pinchuk return res; 21285b5c4e40SEvgeny Pinchuk } 21295b5c4e40SEvgeny Pinchuk 21306d82eb0eSHarish Kasiviswanathan /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD 21316d82eb0eSHarish Kasiviswanathan * topology. If GPU device is found @idx, then valid kfd_dev pointer is 21326d82eb0eSHarish Kasiviswanathan * returned through @kdev 21336d82eb0eSHarish Kasiviswanathan * Return - 0: On success (@kdev will be NULL for non GPU nodes) 21346d82eb0eSHarish Kasiviswanathan * -1: If end of list 21355b5c4e40SEvgeny Pinchuk */ 21368dc1db31SMukul Joshi int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev) 21375b5c4e40SEvgeny Pinchuk { 21385b5c4e40SEvgeny Pinchuk 21395b5c4e40SEvgeny Pinchuk struct kfd_topology_device *top_dev; 21405b5c4e40SEvgeny Pinchuk uint8_t device_idx = 0; 21415b5c4e40SEvgeny Pinchuk 21426d82eb0eSHarish Kasiviswanathan *kdev = NULL; 21435b5c4e40SEvgeny Pinchuk down_read(&topology_lock); 21445b5c4e40SEvgeny Pinchuk 21455b5c4e40SEvgeny Pinchuk list_for_each_entry(top_dev, &topology_device_list, list) { 21465b5c4e40SEvgeny Pinchuk if (device_idx == idx) { 21476d82eb0eSHarish Kasiviswanathan *kdev = top_dev->gpu; 21486d82eb0eSHarish Kasiviswanathan up_read(&topology_lock); 21496d82eb0eSHarish Kasiviswanathan return 0; 21505b5c4e40SEvgeny Pinchuk } 21515b5c4e40SEvgeny Pinchuk 21525b5c4e40SEvgeny Pinchuk device_idx++; 21535b5c4e40SEvgeny Pinchuk } 21545b5c4e40SEvgeny Pinchuk 21555b5c4e40SEvgeny Pinchuk up_read(&topology_lock); 21565b5c4e40SEvgeny Pinchuk 21576d82eb0eSHarish Kasiviswanathan return -1; 21585b5c4e40SEvgeny Pinchuk 21595b5c4e40SEvgeny Pinchuk } 2160851a645eSFelix Kuehling 2161520b8fb7SFelix Kuehling static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) 2162520b8fb7SFelix Kuehling { 2163520b8fb7SFelix Kuehling int first_cpu_of_numa_node; 2164520b8fb7SFelix Kuehling 2165520b8fb7SFelix Kuehling if (!cpumask || cpumask == cpu_none_mask) 2166520b8fb7SFelix Kuehling return -1; 2167520b8fb7SFelix Kuehling first_cpu_of_numa_node = cpumask_first(cpumask); 2168520b8fb7SFelix Kuehling if (first_cpu_of_numa_node >= nr_cpu_ids) 2169520b8fb7SFelix Kuehling return -1; 2170df1dd4f4SFelix Kuehling #ifdef CONFIG_X86_64 2171df1dd4f4SFelix Kuehling return cpu_data(first_cpu_of_numa_node).apicid; 2172df1dd4f4SFelix Kuehling #else 2173df1dd4f4SFelix Kuehling return first_cpu_of_numa_node; 2174df1dd4f4SFelix Kuehling #endif 2175520b8fb7SFelix Kuehling } 2176520b8fb7SFelix Kuehling 2177520b8fb7SFelix Kuehling /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor 2178520b8fb7SFelix Kuehling * of the given NUMA node (numa_node_id) 2179520b8fb7SFelix Kuehling * Return -1 on failure 2180520b8fb7SFelix Kuehling */ 2181520b8fb7SFelix Kuehling int kfd_numa_node_to_apic_id(int numa_node_id) 2182520b8fb7SFelix Kuehling { 2183520b8fb7SFelix Kuehling if (numa_node_id == -1) { 2184520b8fb7SFelix Kuehling pr_warn("Invalid NUMA Node. Use online CPU mask\n"); 2185520b8fb7SFelix Kuehling return kfd_cpumask_to_apic_id(cpu_online_mask); 2186520b8fb7SFelix Kuehling } 2187520b8fb7SFelix Kuehling return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); 2188520b8fb7SFelix Kuehling } 2189520b8fb7SFelix Kuehling 21906127896fSHuang Rui void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) 21916127896fSHuang Rui { 21926127896fSHuang Rui struct kfd_topology_device *dev; 21936127896fSHuang Rui 21946127896fSHuang Rui gpu->use_iommu_v2 = false; 21956127896fSHuang Rui 2196f0dc99a6SGraham Sider if (!gpu->device_info.needs_iommu_device) 21976127896fSHuang Rui return; 21986127896fSHuang Rui 21996127896fSHuang Rui down_read(&topology_lock); 22006127896fSHuang Rui 22016127896fSHuang Rui /* Only use IOMMUv2 if there is an APU topology node with no GPU 22026127896fSHuang Rui * assigned yet. This GPU will be assigned to it. 22036127896fSHuang Rui */ 22046127896fSHuang Rui list_for_each_entry(dev, &topology_device_list, list) 22056127896fSHuang Rui if (dev->node_props.cpu_cores_count && 22066127896fSHuang Rui dev->node_props.simd_count && 22076127896fSHuang Rui !dev->gpu) 22086127896fSHuang Rui gpu->use_iommu_v2 = true; 22096127896fSHuang Rui 22106127896fSHuang Rui up_read(&topology_lock); 22116127896fSHuang Rui } 22126127896fSHuang Rui 2213851a645eSFelix Kuehling #if defined(CONFIG_DEBUG_FS) 2214851a645eSFelix Kuehling 2215851a645eSFelix Kuehling int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) 2216851a645eSFelix Kuehling { 2217851a645eSFelix Kuehling struct kfd_topology_device *dev; 2218851a645eSFelix Kuehling unsigned int i = 0; 2219851a645eSFelix Kuehling int r = 0; 2220851a645eSFelix Kuehling 2221851a645eSFelix Kuehling down_read(&topology_lock); 2222851a645eSFelix Kuehling 2223851a645eSFelix Kuehling list_for_each_entry(dev, &topology_device_list, list) { 2224851a645eSFelix Kuehling if (!dev->gpu) { 2225851a645eSFelix Kuehling i++; 2226851a645eSFelix Kuehling continue; 2227851a645eSFelix Kuehling } 2228851a645eSFelix Kuehling 2229851a645eSFelix Kuehling seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); 2230851a645eSFelix Kuehling r = dqm_debugfs_hqds(m, dev->gpu->dqm); 2231851a645eSFelix Kuehling if (r) 2232851a645eSFelix Kuehling break; 2233851a645eSFelix Kuehling } 2234851a645eSFelix Kuehling 2235851a645eSFelix Kuehling up_read(&topology_lock); 2236851a645eSFelix Kuehling 2237851a645eSFelix Kuehling return r; 2238851a645eSFelix Kuehling } 2239851a645eSFelix Kuehling 2240851a645eSFelix Kuehling int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) 2241851a645eSFelix Kuehling { 2242851a645eSFelix Kuehling struct kfd_topology_device *dev; 2243851a645eSFelix Kuehling unsigned int i = 0; 2244851a645eSFelix Kuehling int r = 0; 2245851a645eSFelix Kuehling 2246851a645eSFelix Kuehling down_read(&topology_lock); 2247851a645eSFelix Kuehling 2248851a645eSFelix Kuehling list_for_each_entry(dev, &topology_device_list, list) { 2249851a645eSFelix Kuehling if (!dev->gpu) { 2250851a645eSFelix Kuehling i++; 2251851a645eSFelix Kuehling continue; 2252851a645eSFelix Kuehling } 2253851a645eSFelix Kuehling 2254851a645eSFelix Kuehling seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); 22559af5379cSOak Zeng r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); 2256851a645eSFelix Kuehling if (r) 2257851a645eSFelix Kuehling break; 2258851a645eSFelix Kuehling } 2259851a645eSFelix Kuehling 2260851a645eSFelix Kuehling up_read(&topology_lock); 2261851a645eSFelix Kuehling 2262851a645eSFelix Kuehling return r; 2263851a645eSFelix Kuehling } 2264851a645eSFelix Kuehling 2265851a645eSFelix Kuehling #endif 2266