14a488a7aSOded Gabbay /* 24a488a7aSOded Gabbay * Copyright 2014 Advanced Micro Devices, Inc. 34a488a7aSOded Gabbay * 44a488a7aSOded Gabbay * Permission is hereby granted, free of charge, to any person obtaining a 54a488a7aSOded Gabbay * copy of this software and associated documentation files (the "Software"), 64a488a7aSOded Gabbay * to deal in the Software without restriction, including without limitation 74a488a7aSOded Gabbay * the rights to use, copy, modify, merge, publish, distribute, sublicense, 84a488a7aSOded Gabbay * and/or sell copies of the Software, and to permit persons to whom the 94a488a7aSOded Gabbay * Software is furnished to do so, subject to the following conditions: 104a488a7aSOded Gabbay * 114a488a7aSOded Gabbay * The above copyright notice and this permission notice shall be included in 124a488a7aSOded Gabbay * all copies or substantial portions of the Software. 134a488a7aSOded Gabbay * 144a488a7aSOded Gabbay * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 154a488a7aSOded Gabbay * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 164a488a7aSOded Gabbay * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 174a488a7aSOded Gabbay * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 184a488a7aSOded Gabbay * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 194a488a7aSOded Gabbay * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 204a488a7aSOded Gabbay * OTHER DEALINGS IN THE SOFTWARE. 214a488a7aSOded Gabbay */ 224a488a7aSOded Gabbay 234a488a7aSOded Gabbay #include <linux/bsearch.h> 244a488a7aSOded Gabbay #include <linux/pci.h> 254a488a7aSOded Gabbay #include <linux/slab.h> 264a488a7aSOded Gabbay #include "kfd_priv.h" 2764c7f8cfSBen Goz #include "kfd_device_queue_manager.h" 28507968ddSFelix Kuehling #include "kfd_pm4_headers_vi.h" 29fd6a440eSJonathan Kim #include "kfd_pm4_headers_aldebaran.h" 300db54b24SYong Zhao #include "cwsr_trap_handler.h" 3164d1c3a4SFelix Kuehling #include "kfd_iommu.h" 325b87245fSAmber Lin #include "amdgpu_amdkfd.h" 332c2b0d88SMukul Joshi #include "kfd_smi_events.h" 34814ab993SPhilip Yang #include "kfd_migrate.h" 355b983db8SAlex Deucher #include "amdgpu.h" 364a488a7aSOded Gabbay 3719f6d2a6SOded Gabbay #define MQD_SIZE_ALIGNED 768 38e42051d2SShaoyun Liu 39e42051d2SShaoyun Liu /* 40e42051d2SShaoyun Liu * kfd_locked is used to lock the kfd driver during suspend or reset 41e42051d2SShaoyun Liu * once locked, kfd driver will stop any further GPU execution. 42e42051d2SShaoyun Liu * create process (open) will return -EAGAIN. 43e42051d2SShaoyun Liu */ 44e42051d2SShaoyun Liu static atomic_t kfd_locked = ATOMIC_INIT(0); 4519f6d2a6SOded Gabbay 46a3e520a2SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 47e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; 48a3e520a2SAlex Deucher #endif 49e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; 50e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; 51e392c887SYong Zhao extern const struct kfd2kgd_calls arcturus_kfd2kgd; 525073506cSJonathan Kim extern const struct kfd2kgd_calls aldebaran_kfd2kgd; 53e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; 543a2f0c81SYong Zhao extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; 55e392c887SYong Zhao 5664d1c3a4SFelix Kuehling #ifdef KFD_SUPPORT_IOMMU_V2 574a488a7aSOded Gabbay static const struct kfd_device_info kaveri_device_info = { 580da7558cSBen Goz .asic_family = CHIP_KAVERI, 59c181159aSYong Zhao .asic_name = "kaveri", 609d6fa9c7SGraham Sider .gfx_target_version = 70000, 610da7558cSBen Goz .max_pasid_bits = 16, 62992839adSYair Shachar /* max num of queues for KV.TODO should be a dynamic value */ 63992839adSYair Shachar .max_no_of_hqd = 24, 64ada2b29cSFelix Kuehling .doorbell_size = 4, 650da7558cSBen Goz .ih_ring_entry_size = 4 * sizeof(uint32_t), 66f3a39818SAndrew Lewycky .event_interrupt_class = &event_interrupt_class_cik, 67fbeb661bSYair Shachar .num_of_watch_points = 4, 68373d7080SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 69373d7080SFelix Kuehling .supports_cwsr = false, 7064d1c3a4SFelix Kuehling .needs_iommu_device = true, 713ee2d00cSFelix Kuehling .needs_pci_atomics = false, 7298bb9222SYong Zhao .num_sdma_engines = 2, 731b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 74d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 750da7558cSBen Goz }; 760da7558cSBen Goz 770da7558cSBen Goz static const struct kfd_device_info carrizo_device_info = { 780da7558cSBen Goz .asic_family = CHIP_CARRIZO, 79c181159aSYong Zhao .asic_name = "carrizo", 809d6fa9c7SGraham Sider .gfx_target_version = 80001, 814a488a7aSOded Gabbay .max_pasid_bits = 16, 82eaccd6e7SOded Gabbay /* max num of queues for CZ.TODO should be a dynamic value */ 83eaccd6e7SOded Gabbay .max_no_of_hqd = 24, 84ada2b29cSFelix Kuehling .doorbell_size = 4, 85b3f5e6b4SAndrew Lewycky .ih_ring_entry_size = 4 * sizeof(uint32_t), 86eaccd6e7SOded Gabbay .event_interrupt_class = &event_interrupt_class_cik, 87f7c826adSAlexey Skidanov .num_of_watch_points = 4, 88373d7080SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 89373d7080SFelix Kuehling .supports_cwsr = true, 9064d1c3a4SFelix Kuehling .needs_iommu_device = true, 913ee2d00cSFelix Kuehling .needs_pci_atomics = false, 9298bb9222SYong Zhao .num_sdma_engines = 2, 931b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 94d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 954a488a7aSOded Gabbay }; 964d663df6SYong Zhao 974d663df6SYong Zhao static const struct kfd_device_info raven_device_info = { 984d663df6SYong Zhao .asic_family = CHIP_RAVEN, 99c181159aSYong Zhao .asic_name = "raven", 1009d6fa9c7SGraham Sider .gfx_target_version = 90002, 1014d663df6SYong Zhao .max_pasid_bits = 16, 1024d663df6SYong Zhao .max_no_of_hqd = 24, 1034d663df6SYong Zhao .doorbell_size = 8, 1044d663df6SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 1054d663df6SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 1064d663df6SYong Zhao .num_of_watch_points = 4, 1074d663df6SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 1084d663df6SYong Zhao .supports_cwsr = true, 1094d663df6SYong Zhao .needs_iommu_device = true, 1104d663df6SYong Zhao .needs_pci_atomics = true, 1114d663df6SYong Zhao .num_sdma_engines = 1, 1121b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 113d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 1144d663df6SYong Zhao }; 1150f3d2b68SAlex Deucher #endif 1164a488a7aSOded Gabbay 11718f12604SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 118a3084e6cSFelix Kuehling static const struct kfd_device_info hawaii_device_info = { 119a3084e6cSFelix Kuehling .asic_family = CHIP_HAWAII, 120c181159aSYong Zhao .asic_name = "hawaii", 1219d6fa9c7SGraham Sider .gfx_target_version = 70001, 122a3084e6cSFelix Kuehling .max_pasid_bits = 16, 123a3084e6cSFelix Kuehling /* max num of queues for KV.TODO should be a dynamic value */ 124a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 125ada2b29cSFelix Kuehling .doorbell_size = 4, 126a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 127a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 128a3084e6cSFelix Kuehling .num_of_watch_points = 4, 129a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 130a3084e6cSFelix Kuehling .supports_cwsr = false, 13164d1c3a4SFelix Kuehling .needs_iommu_device = false, 132a3084e6cSFelix Kuehling .needs_pci_atomics = false, 13398bb9222SYong Zhao .num_sdma_engines = 2, 1341b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 135d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 136a3084e6cSFelix Kuehling }; 13718f12604SAlex Deucher #endif 138a3084e6cSFelix Kuehling 139a3084e6cSFelix Kuehling static const struct kfd_device_info tonga_device_info = { 140a3084e6cSFelix Kuehling .asic_family = CHIP_TONGA, 141c181159aSYong Zhao .asic_name = "tonga", 1429d6fa9c7SGraham Sider .gfx_target_version = 80002, 143a3084e6cSFelix Kuehling .max_pasid_bits = 16, 144a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 145ada2b29cSFelix Kuehling .doorbell_size = 4, 146a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 147a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 148a3084e6cSFelix Kuehling .num_of_watch_points = 4, 149a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 150a3084e6cSFelix Kuehling .supports_cwsr = false, 15164d1c3a4SFelix Kuehling .needs_iommu_device = false, 152a3084e6cSFelix Kuehling .needs_pci_atomics = true, 15398bb9222SYong Zhao .num_sdma_engines = 2, 1541b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 155d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 156a3084e6cSFelix Kuehling }; 157a3084e6cSFelix Kuehling 158a3084e6cSFelix Kuehling static const struct kfd_device_info fiji_device_info = { 159a3084e6cSFelix Kuehling .asic_family = CHIP_FIJI, 160c181159aSYong Zhao .asic_name = "fiji", 1619d6fa9c7SGraham Sider .gfx_target_version = 80003, 162a3084e6cSFelix Kuehling .max_pasid_bits = 16, 163a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 164ada2b29cSFelix Kuehling .doorbell_size = 4, 165a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 166a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 167a3084e6cSFelix Kuehling .num_of_watch_points = 4, 168a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 169a3084e6cSFelix Kuehling .supports_cwsr = true, 17064d1c3a4SFelix Kuehling .needs_iommu_device = false, 171a3084e6cSFelix Kuehling .needs_pci_atomics = true, 17298bb9222SYong Zhao .num_sdma_engines = 2, 1731b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 174d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 175a3084e6cSFelix Kuehling }; 176a3084e6cSFelix Kuehling 177a3084e6cSFelix Kuehling static const struct kfd_device_info fiji_vf_device_info = { 178a3084e6cSFelix Kuehling .asic_family = CHIP_FIJI, 179c181159aSYong Zhao .asic_name = "fiji", 1809d6fa9c7SGraham Sider .gfx_target_version = 80003, 181a3084e6cSFelix Kuehling .max_pasid_bits = 16, 182a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 183ada2b29cSFelix Kuehling .doorbell_size = 4, 184a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 185a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 186a3084e6cSFelix Kuehling .num_of_watch_points = 4, 187a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 188a3084e6cSFelix Kuehling .supports_cwsr = true, 18964d1c3a4SFelix Kuehling .needs_iommu_device = false, 190a3084e6cSFelix Kuehling .needs_pci_atomics = false, 19198bb9222SYong Zhao .num_sdma_engines = 2, 1921b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 193d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 194a3084e6cSFelix Kuehling }; 195a3084e6cSFelix Kuehling 196a3084e6cSFelix Kuehling 197a3084e6cSFelix Kuehling static const struct kfd_device_info polaris10_device_info = { 198a3084e6cSFelix Kuehling .asic_family = CHIP_POLARIS10, 199c181159aSYong Zhao .asic_name = "polaris10", 2009d6fa9c7SGraham Sider .gfx_target_version = 80003, 201a3084e6cSFelix Kuehling .max_pasid_bits = 16, 202a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 203ada2b29cSFelix Kuehling .doorbell_size = 4, 204a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 205a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 206a3084e6cSFelix Kuehling .num_of_watch_points = 4, 207a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 208a3084e6cSFelix Kuehling .supports_cwsr = true, 20964d1c3a4SFelix Kuehling .needs_iommu_device = false, 210a3084e6cSFelix Kuehling .needs_pci_atomics = true, 21198bb9222SYong Zhao .num_sdma_engines = 2, 2121b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 213d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 214a3084e6cSFelix Kuehling }; 215a3084e6cSFelix Kuehling 216a3084e6cSFelix Kuehling static const struct kfd_device_info polaris10_vf_device_info = { 217a3084e6cSFelix Kuehling .asic_family = CHIP_POLARIS10, 218c181159aSYong Zhao .asic_name = "polaris10", 2199d6fa9c7SGraham Sider .gfx_target_version = 80003, 220a3084e6cSFelix Kuehling .max_pasid_bits = 16, 221a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 222ada2b29cSFelix Kuehling .doorbell_size = 4, 223a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 224a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 225a3084e6cSFelix Kuehling .num_of_watch_points = 4, 226a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 227a3084e6cSFelix Kuehling .supports_cwsr = true, 22864d1c3a4SFelix Kuehling .needs_iommu_device = false, 229a3084e6cSFelix Kuehling .needs_pci_atomics = false, 23098bb9222SYong Zhao .num_sdma_engines = 2, 2311b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 232d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 233a3084e6cSFelix Kuehling }; 234a3084e6cSFelix Kuehling 235a3084e6cSFelix Kuehling static const struct kfd_device_info polaris11_device_info = { 236a3084e6cSFelix Kuehling .asic_family = CHIP_POLARIS11, 237c181159aSYong Zhao .asic_name = "polaris11", 2389d6fa9c7SGraham Sider .gfx_target_version = 80003, 239a3084e6cSFelix Kuehling .max_pasid_bits = 16, 240a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 241ada2b29cSFelix Kuehling .doorbell_size = 4, 242a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 243a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 244a3084e6cSFelix Kuehling .num_of_watch_points = 4, 245a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 246a3084e6cSFelix Kuehling .supports_cwsr = true, 24764d1c3a4SFelix Kuehling .needs_iommu_device = false, 248a3084e6cSFelix Kuehling .needs_pci_atomics = true, 24998bb9222SYong Zhao .num_sdma_engines = 2, 2501b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 251d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 252a3084e6cSFelix Kuehling }; 253a3084e6cSFelix Kuehling 254846a44d7SGang Ba static const struct kfd_device_info polaris12_device_info = { 255846a44d7SGang Ba .asic_family = CHIP_POLARIS12, 256c181159aSYong Zhao .asic_name = "polaris12", 2579d6fa9c7SGraham Sider .gfx_target_version = 80003, 258846a44d7SGang Ba .max_pasid_bits = 16, 259846a44d7SGang Ba .max_no_of_hqd = 24, 260846a44d7SGang Ba .doorbell_size = 4, 261846a44d7SGang Ba .ih_ring_entry_size = 4 * sizeof(uint32_t), 262846a44d7SGang Ba .event_interrupt_class = &event_interrupt_class_cik, 263846a44d7SGang Ba .num_of_watch_points = 4, 264846a44d7SGang Ba .mqd_size_aligned = MQD_SIZE_ALIGNED, 265846a44d7SGang Ba .supports_cwsr = true, 266846a44d7SGang Ba .needs_iommu_device = false, 267846a44d7SGang Ba .needs_pci_atomics = true, 268846a44d7SGang Ba .num_sdma_engines = 2, 2691b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 270846a44d7SGang Ba .num_sdma_queues_per_engine = 2, 271846a44d7SGang Ba }; 272846a44d7SGang Ba 273ed81cd6eSKent Russell static const struct kfd_device_info vegam_device_info = { 274ed81cd6eSKent Russell .asic_family = CHIP_VEGAM, 275c181159aSYong Zhao .asic_name = "vegam", 2769d6fa9c7SGraham Sider .gfx_target_version = 80003, 277ed81cd6eSKent Russell .max_pasid_bits = 16, 278ed81cd6eSKent Russell .max_no_of_hqd = 24, 279ed81cd6eSKent Russell .doorbell_size = 4, 280ed81cd6eSKent Russell .ih_ring_entry_size = 4 * sizeof(uint32_t), 281ed81cd6eSKent Russell .event_interrupt_class = &event_interrupt_class_cik, 282ed81cd6eSKent Russell .num_of_watch_points = 4, 283ed81cd6eSKent Russell .mqd_size_aligned = MQD_SIZE_ALIGNED, 284ed81cd6eSKent Russell .supports_cwsr = true, 285ed81cd6eSKent Russell .needs_iommu_device = false, 286ed81cd6eSKent Russell .needs_pci_atomics = true, 287ed81cd6eSKent Russell .num_sdma_engines = 2, 288ed81cd6eSKent Russell .num_xgmi_sdma_engines = 0, 289a3084e6cSFelix Kuehling .num_sdma_queues_per_engine = 2, 290a3084e6cSFelix Kuehling }; 291a3084e6cSFelix Kuehling 292389056e5SFelix Kuehling static const struct kfd_device_info vega10_device_info = { 293389056e5SFelix Kuehling .asic_family = CHIP_VEGA10, 294c181159aSYong Zhao .asic_name = "vega10", 2959d6fa9c7SGraham Sider .gfx_target_version = 90000, 296389056e5SFelix Kuehling .max_pasid_bits = 16, 297389056e5SFelix Kuehling .max_no_of_hqd = 24, 298389056e5SFelix Kuehling .doorbell_size = 8, 299389056e5SFelix Kuehling .ih_ring_entry_size = 8 * sizeof(uint32_t), 300389056e5SFelix Kuehling .event_interrupt_class = &event_interrupt_class_v9, 301389056e5SFelix Kuehling .num_of_watch_points = 4, 302389056e5SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 303389056e5SFelix Kuehling .supports_cwsr = true, 304389056e5SFelix Kuehling .needs_iommu_device = false, 305389056e5SFelix Kuehling .needs_pci_atomics = false, 30698bb9222SYong Zhao .num_sdma_engines = 2, 3071b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 308d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 309389056e5SFelix Kuehling }; 310389056e5SFelix Kuehling 311389056e5SFelix Kuehling static const struct kfd_device_info vega10_vf_device_info = { 312389056e5SFelix Kuehling .asic_family = CHIP_VEGA10, 313c181159aSYong Zhao .asic_name = "vega10", 3149d6fa9c7SGraham Sider .gfx_target_version = 90000, 315389056e5SFelix Kuehling .max_pasid_bits = 16, 316389056e5SFelix Kuehling .max_no_of_hqd = 24, 317389056e5SFelix Kuehling .doorbell_size = 8, 318389056e5SFelix Kuehling .ih_ring_entry_size = 8 * sizeof(uint32_t), 319389056e5SFelix Kuehling .event_interrupt_class = &event_interrupt_class_v9, 320389056e5SFelix Kuehling .num_of_watch_points = 4, 321389056e5SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 322389056e5SFelix Kuehling .supports_cwsr = true, 323389056e5SFelix Kuehling .needs_iommu_device = false, 324389056e5SFelix Kuehling .needs_pci_atomics = false, 32598bb9222SYong Zhao .num_sdma_engines = 2, 3261b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 327d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 328389056e5SFelix Kuehling }; 329389056e5SFelix Kuehling 330846a44d7SGang Ba static const struct kfd_device_info vega12_device_info = { 331846a44d7SGang Ba .asic_family = CHIP_VEGA12, 332c181159aSYong Zhao .asic_name = "vega12", 3339d6fa9c7SGraham Sider .gfx_target_version = 90004, 334846a44d7SGang Ba .max_pasid_bits = 16, 335846a44d7SGang Ba .max_no_of_hqd = 24, 336846a44d7SGang Ba .doorbell_size = 8, 337846a44d7SGang Ba .ih_ring_entry_size = 8 * sizeof(uint32_t), 338846a44d7SGang Ba .event_interrupt_class = &event_interrupt_class_v9, 339846a44d7SGang Ba .num_of_watch_points = 4, 340846a44d7SGang Ba .mqd_size_aligned = MQD_SIZE_ALIGNED, 341846a44d7SGang Ba .supports_cwsr = true, 342846a44d7SGang Ba .needs_iommu_device = false, 343846a44d7SGang Ba .needs_pci_atomics = false, 344846a44d7SGang Ba .num_sdma_engines = 2, 3451b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 346846a44d7SGang Ba .num_sdma_queues_per_engine = 2, 347846a44d7SGang Ba }; 348846a44d7SGang Ba 34922a3a294SShaoyun Liu static const struct kfd_device_info vega20_device_info = { 35022a3a294SShaoyun Liu .asic_family = CHIP_VEGA20, 351c181159aSYong Zhao .asic_name = "vega20", 3529d6fa9c7SGraham Sider .gfx_target_version = 90006, 35322a3a294SShaoyun Liu .max_pasid_bits = 16, 35422a3a294SShaoyun Liu .max_no_of_hqd = 24, 35522a3a294SShaoyun Liu .doorbell_size = 8, 35622a3a294SShaoyun Liu .ih_ring_entry_size = 8 * sizeof(uint32_t), 35722a3a294SShaoyun Liu .event_interrupt_class = &event_interrupt_class_v9, 35822a3a294SShaoyun Liu .num_of_watch_points = 4, 35922a3a294SShaoyun Liu .mqd_size_aligned = MQD_SIZE_ALIGNED, 36022a3a294SShaoyun Liu .supports_cwsr = true, 36122a3a294SShaoyun Liu .needs_iommu_device = false, 362006a0b3dSShaoyun Liu .needs_pci_atomics = false, 36322a3a294SShaoyun Liu .num_sdma_engines = 2, 3641b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 36522a3a294SShaoyun Liu .num_sdma_queues_per_engine = 8, 36622a3a294SShaoyun Liu }; 36722a3a294SShaoyun Liu 36849adcf8aSYong Zhao static const struct kfd_device_info arcturus_device_info = { 36949adcf8aSYong Zhao .asic_family = CHIP_ARCTURUS, 370c181159aSYong Zhao .asic_name = "arcturus", 3719d6fa9c7SGraham Sider .gfx_target_version = 90008, 37249adcf8aSYong Zhao .max_pasid_bits = 16, 37349adcf8aSYong Zhao .max_no_of_hqd = 24, 37449adcf8aSYong Zhao .doorbell_size = 8, 37549adcf8aSYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 37649adcf8aSYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 37749adcf8aSYong Zhao .num_of_watch_points = 4, 37849adcf8aSYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 37949adcf8aSYong Zhao .supports_cwsr = true, 38049adcf8aSYong Zhao .needs_iommu_device = false, 38149adcf8aSYong Zhao .needs_pci_atomics = false, 382b6689cf7SOak Zeng .num_sdma_engines = 2, 383b6689cf7SOak Zeng .num_xgmi_sdma_engines = 6, 38449adcf8aSYong Zhao .num_sdma_queues_per_engine = 8, 38549adcf8aSYong Zhao }; 38649adcf8aSYong Zhao 38736e22d59SYong Zhao static const struct kfd_device_info aldebaran_device_info = { 38836e22d59SYong Zhao .asic_family = CHIP_ALDEBARAN, 38936e22d59SYong Zhao .asic_name = "aldebaran", 3909d6fa9c7SGraham Sider .gfx_target_version = 90010, 39136e22d59SYong Zhao .max_pasid_bits = 16, 39236e22d59SYong Zhao .max_no_of_hqd = 24, 39336e22d59SYong Zhao .doorbell_size = 8, 39436e22d59SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 39536e22d59SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 39636e22d59SYong Zhao .num_of_watch_points = 4, 39736e22d59SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 39836e22d59SYong Zhao .supports_cwsr = true, 39936e22d59SYong Zhao .needs_iommu_device = false, 40036e22d59SYong Zhao .needs_pci_atomics = false, 40136e22d59SYong Zhao .num_sdma_engines = 2, 40236e22d59SYong Zhao .num_xgmi_sdma_engines = 3, 40336e22d59SYong Zhao .num_sdma_queues_per_engine = 8, 40436e22d59SYong Zhao }; 40536e22d59SYong Zhao 4062b9c2211SHuang Rui static const struct kfd_device_info renoir_device_info = { 4072b9c2211SHuang Rui .asic_family = CHIP_RENOIR, 408acb9acbeSHuang Rui .asic_name = "renoir", 409cc22b927SGraham Sider .gfx_target_version = 90012, 4102b9c2211SHuang Rui .max_pasid_bits = 16, 4112b9c2211SHuang Rui .max_no_of_hqd = 24, 4122b9c2211SHuang Rui .doorbell_size = 8, 4132b9c2211SHuang Rui .ih_ring_entry_size = 8 * sizeof(uint32_t), 4142b9c2211SHuang Rui .event_interrupt_class = &event_interrupt_class_v9, 4152b9c2211SHuang Rui .num_of_watch_points = 4, 4162b9c2211SHuang Rui .mqd_size_aligned = MQD_SIZE_ALIGNED, 4172b9c2211SHuang Rui .supports_cwsr = true, 4182b9c2211SHuang Rui .needs_iommu_device = false, 4192b9c2211SHuang Rui .needs_pci_atomics = false, 4202b9c2211SHuang Rui .num_sdma_engines = 1, 4212b9c2211SHuang Rui .num_xgmi_sdma_engines = 0, 4222b9c2211SHuang Rui .num_sdma_queues_per_engine = 2, 4232b9c2211SHuang Rui }; 4242b9c2211SHuang Rui 42514328aa5SPhilip Cox static const struct kfd_device_info navi10_device_info = { 42614328aa5SPhilip Cox .asic_family = CHIP_NAVI10, 427c181159aSYong Zhao .asic_name = "navi10", 4289d6fa9c7SGraham Sider .gfx_target_version = 100100, 42914328aa5SPhilip Cox .max_pasid_bits = 16, 43014328aa5SPhilip Cox .max_no_of_hqd = 24, 43114328aa5SPhilip Cox .doorbell_size = 8, 43214328aa5SPhilip Cox .ih_ring_entry_size = 8 * sizeof(uint32_t), 43314328aa5SPhilip Cox .event_interrupt_class = &event_interrupt_class_v9, 43414328aa5SPhilip Cox .num_of_watch_points = 4, 43514328aa5SPhilip Cox .mqd_size_aligned = MQD_SIZE_ALIGNED, 43614328aa5SPhilip Cox .needs_iommu_device = false, 43714328aa5SPhilip Cox .supports_cwsr = true, 4386cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 439e312af6cSFelix Kuehling .no_atomic_fw_version = 145, 44014328aa5SPhilip Cox .num_sdma_engines = 2, 44114328aa5SPhilip Cox .num_xgmi_sdma_engines = 0, 44214328aa5SPhilip Cox .num_sdma_queues_per_engine = 8, 44314328aa5SPhilip Cox }; 44414328aa5SPhilip Cox 445b77fb9d8Sshaoyunl static const struct kfd_device_info navi12_device_info = { 4460e94b564Sshaoyunl .asic_family = CHIP_NAVI12, 447b77fb9d8Sshaoyunl .asic_name = "navi12", 4489d6fa9c7SGraham Sider .gfx_target_version = 100101, 449b77fb9d8Sshaoyunl .max_pasid_bits = 16, 450b77fb9d8Sshaoyunl .max_no_of_hqd = 24, 451b77fb9d8Sshaoyunl .doorbell_size = 8, 452b77fb9d8Sshaoyunl .ih_ring_entry_size = 8 * sizeof(uint32_t), 453b77fb9d8Sshaoyunl .event_interrupt_class = &event_interrupt_class_v9, 454b77fb9d8Sshaoyunl .num_of_watch_points = 4, 455b77fb9d8Sshaoyunl .mqd_size_aligned = MQD_SIZE_ALIGNED, 456b77fb9d8Sshaoyunl .needs_iommu_device = false, 457b77fb9d8Sshaoyunl .supports_cwsr = true, 4586cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 459e312af6cSFelix Kuehling .no_atomic_fw_version = 145, 460b77fb9d8Sshaoyunl .num_sdma_engines = 2, 461b77fb9d8Sshaoyunl .num_xgmi_sdma_engines = 0, 462b77fb9d8Sshaoyunl .num_sdma_queues_per_engine = 8, 463b77fb9d8Sshaoyunl }; 464b77fb9d8Sshaoyunl 4658099ae40SYong Zhao static const struct kfd_device_info navi14_device_info = { 4668099ae40SYong Zhao .asic_family = CHIP_NAVI14, 4678099ae40SYong Zhao .asic_name = "navi14", 4689d6fa9c7SGraham Sider .gfx_target_version = 100102, 4698099ae40SYong Zhao .max_pasid_bits = 16, 4708099ae40SYong Zhao .max_no_of_hqd = 24, 4718099ae40SYong Zhao .doorbell_size = 8, 4728099ae40SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 4738099ae40SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 4748099ae40SYong Zhao .num_of_watch_points = 4, 4758099ae40SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 4768099ae40SYong Zhao .needs_iommu_device = false, 4778099ae40SYong Zhao .supports_cwsr = true, 4786cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 479e312af6cSFelix Kuehling .no_atomic_fw_version = 145, 4808099ae40SYong Zhao .num_sdma_engines = 2, 4818099ae40SYong Zhao .num_xgmi_sdma_engines = 0, 4828099ae40SYong Zhao .num_sdma_queues_per_engine = 8, 4838099ae40SYong Zhao }; 4848099ae40SYong Zhao 4853a2f0c81SYong Zhao static const struct kfd_device_info sienna_cichlid_device_info = { 4863a2f0c81SYong Zhao .asic_family = CHIP_SIENNA_CICHLID, 4873a2f0c81SYong Zhao .asic_name = "sienna_cichlid", 4889d6fa9c7SGraham Sider .gfx_target_version = 100300, 4893a2f0c81SYong Zhao .max_pasid_bits = 16, 4903a2f0c81SYong Zhao .max_no_of_hqd = 24, 4913a2f0c81SYong Zhao .doorbell_size = 8, 4923a2f0c81SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 4933a2f0c81SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 4943a2f0c81SYong Zhao .num_of_watch_points = 4, 4953a2f0c81SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 4963a2f0c81SYong Zhao .needs_iommu_device = false, 4973a2f0c81SYong Zhao .supports_cwsr = true, 4986cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 499e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 5003a2f0c81SYong Zhao .num_sdma_engines = 4, 5013a2f0c81SYong Zhao .num_xgmi_sdma_engines = 0, 5023a2f0c81SYong Zhao .num_sdma_queues_per_engine = 8, 5033a2f0c81SYong Zhao }; 5043a2f0c81SYong Zhao 505de89b2e4SChengming Gui static const struct kfd_device_info navy_flounder_device_info = { 506de89b2e4SChengming Gui .asic_family = CHIP_NAVY_FLOUNDER, 507de89b2e4SChengming Gui .asic_name = "navy_flounder", 5089d6fa9c7SGraham Sider .gfx_target_version = 100301, 509de89b2e4SChengming Gui .max_pasid_bits = 16, 510de89b2e4SChengming Gui .max_no_of_hqd = 24, 511de89b2e4SChengming Gui .doorbell_size = 8, 512de89b2e4SChengming Gui .ih_ring_entry_size = 8 * sizeof(uint32_t), 513de89b2e4SChengming Gui .event_interrupt_class = &event_interrupt_class_v9, 514de89b2e4SChengming Gui .num_of_watch_points = 4, 515de89b2e4SChengming Gui .mqd_size_aligned = MQD_SIZE_ALIGNED, 516de89b2e4SChengming Gui .needs_iommu_device = false, 517de89b2e4SChengming Gui .supports_cwsr = true, 5186cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 519e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 520de89b2e4SChengming Gui .num_sdma_engines = 2, 521de89b2e4SChengming Gui .num_xgmi_sdma_engines = 0, 522de89b2e4SChengming Gui .num_sdma_queues_per_engine = 8, 523de89b2e4SChengming Gui }; 524de89b2e4SChengming Gui 5253a5e715dSHuang Rui static const struct kfd_device_info vangogh_device_info = { 5263a5e715dSHuang Rui .asic_family = CHIP_VANGOGH, 5273a5e715dSHuang Rui .asic_name = "vangogh", 5289d6fa9c7SGraham Sider .gfx_target_version = 100303, 5293a5e715dSHuang Rui .max_pasid_bits = 16, 5303a5e715dSHuang Rui .max_no_of_hqd = 24, 5313a5e715dSHuang Rui .doorbell_size = 8, 5323a5e715dSHuang Rui .ih_ring_entry_size = 8 * sizeof(uint32_t), 5333a5e715dSHuang Rui .event_interrupt_class = &event_interrupt_class_v9, 5343a5e715dSHuang Rui .num_of_watch_points = 4, 5353a5e715dSHuang Rui .mqd_size_aligned = MQD_SIZE_ALIGNED, 5363a5e715dSHuang Rui .needs_iommu_device = false, 5373a5e715dSHuang Rui .supports_cwsr = true, 538e312af6cSFelix Kuehling .needs_pci_atomics = true, 539e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 5403a5e715dSHuang Rui .num_sdma_engines = 1, 5413a5e715dSHuang Rui .num_xgmi_sdma_engines = 0, 5423a5e715dSHuang Rui .num_sdma_queues_per_engine = 2, 5433a5e715dSHuang Rui }; 5443a5e715dSHuang Rui 545eb5a34d4SChengming Gui static const struct kfd_device_info dimgrey_cavefish_device_info = { 546eb5a34d4SChengming Gui .asic_family = CHIP_DIMGREY_CAVEFISH, 547eb5a34d4SChengming Gui .asic_name = "dimgrey_cavefish", 5489d6fa9c7SGraham Sider .gfx_target_version = 100302, 549eb5a34d4SChengming Gui .max_pasid_bits = 16, 550eb5a34d4SChengming Gui .max_no_of_hqd = 24, 551eb5a34d4SChengming Gui .doorbell_size = 8, 552eb5a34d4SChengming Gui .ih_ring_entry_size = 8 * sizeof(uint32_t), 553eb5a34d4SChengming Gui .event_interrupt_class = &event_interrupt_class_v9, 554eb5a34d4SChengming Gui .num_of_watch_points = 4, 555eb5a34d4SChengming Gui .mqd_size_aligned = MQD_SIZE_ALIGNED, 556eb5a34d4SChengming Gui .needs_iommu_device = false, 557eb5a34d4SChengming Gui .supports_cwsr = true, 5586cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 559e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 560eb5a34d4SChengming Gui .num_sdma_engines = 2, 561eb5a34d4SChengming Gui .num_xgmi_sdma_engines = 0, 562eb5a34d4SChengming Gui .num_sdma_queues_per_engine = 8, 563eb5a34d4SChengming Gui }; 564eb5a34d4SChengming Gui 5655cf607ccSChengming Gui static const struct kfd_device_info beige_goby_device_info = { 5665cf607ccSChengming Gui .asic_family = CHIP_BEIGE_GOBY, 5675cf607ccSChengming Gui .asic_name = "beige_goby", 5689d6fa9c7SGraham Sider .gfx_target_version = 100304, 5695cf607ccSChengming Gui .max_pasid_bits = 16, 5705cf607ccSChengming Gui .max_no_of_hqd = 24, 5715cf607ccSChengming Gui .doorbell_size = 8, 5725cf607ccSChengming Gui .ih_ring_entry_size = 8 * sizeof(uint32_t), 5735cf607ccSChengming Gui .event_interrupt_class = &event_interrupt_class_v9, 5745cf607ccSChengming Gui .num_of_watch_points = 4, 5755cf607ccSChengming Gui .mqd_size_aligned = MQD_SIZE_ALIGNED, 5765cf607ccSChengming Gui .needs_iommu_device = false, 5775cf607ccSChengming Gui .supports_cwsr = true, 5785cf607ccSChengming Gui .needs_pci_atomics = true, 579e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 5805cf607ccSChengming Gui .num_sdma_engines = 1, 5815cf607ccSChengming Gui .num_xgmi_sdma_engines = 0, 5825cf607ccSChengming Gui .num_sdma_queues_per_engine = 8, 5835cf607ccSChengming Gui }; 5845cf607ccSChengming Gui 585bf9d4e88SAaron Liu static const struct kfd_device_info yellow_carp_device_info = { 586bf9d4e88SAaron Liu .asic_family = CHIP_YELLOW_CARP, 587bf9d4e88SAaron Liu .asic_name = "yellow_carp", 5889d6fa9c7SGraham Sider .gfx_target_version = 100305, 589bf9d4e88SAaron Liu .max_pasid_bits = 16, 590bf9d4e88SAaron Liu .max_no_of_hqd = 24, 591bf9d4e88SAaron Liu .doorbell_size = 8, 592bf9d4e88SAaron Liu .ih_ring_entry_size = 8 * sizeof(uint32_t), 593bf9d4e88SAaron Liu .event_interrupt_class = &event_interrupt_class_v9, 594bf9d4e88SAaron Liu .num_of_watch_points = 4, 595bf9d4e88SAaron Liu .mqd_size_aligned = MQD_SIZE_ALIGNED, 596bf9d4e88SAaron Liu .needs_iommu_device = false, 597bf9d4e88SAaron Liu .supports_cwsr = true, 598e312af6cSFelix Kuehling .needs_pci_atomics = true, 599e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 600bf9d4e88SAaron Liu .num_sdma_engines = 1, 601bf9d4e88SAaron Liu .num_xgmi_sdma_engines = 0, 602bf9d4e88SAaron Liu .num_sdma_queues_per_engine = 2, 603bf9d4e88SAaron Liu }; 604eb5a34d4SChengming Gui 60506e75b88STao Zhou static const struct kfd_device_info cyan_skillfish_device_info = { 60606e75b88STao Zhou .asic_family = CHIP_CYAN_SKILLFISH, 60706e75b88STao Zhou .asic_name = "cyan_skillfish", 6089d6fa9c7SGraham Sider .gfx_target_version = 100103, 60906e75b88STao Zhou .max_pasid_bits = 16, 61006e75b88STao Zhou .max_no_of_hqd = 24, 61106e75b88STao Zhou .doorbell_size = 8, 61206e75b88STao Zhou .ih_ring_entry_size = 8 * sizeof(uint32_t), 61306e75b88STao Zhou .event_interrupt_class = &event_interrupt_class_v9, 61406e75b88STao Zhou .num_of_watch_points = 4, 61506e75b88STao Zhou .mqd_size_aligned = MQD_SIZE_ALIGNED, 61606e75b88STao Zhou .needs_iommu_device = false, 61706e75b88STao Zhou .supports_cwsr = true, 61806e75b88STao Zhou .needs_pci_atomics = true, 61906e75b88STao Zhou .num_sdma_engines = 2, 62006e75b88STao Zhou .num_xgmi_sdma_engines = 0, 62106e75b88STao Zhou .num_sdma_queues_per_engine = 8, 62206e75b88STao Zhou }; 62306e75b88STao Zhou 6246e81090bSOded Gabbay static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 6256e81090bSOded Gabbay unsigned int chunk_size); 6266e81090bSOded Gabbay static void kfd_gtt_sa_fini(struct kfd_dev *kfd); 6276e81090bSOded Gabbay 628b8935a7cSYong Zhao static int kfd_resume(struct kfd_dev *kfd); 629b8935a7cSYong Zhao 630b5d1d755SGraham Sider struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) 6314a488a7aSOded Gabbay { 6324a488a7aSOded Gabbay struct kfd_dev *kfd; 633050091abSYong Zhao const struct kfd_device_info *device_info; 634e392c887SYong Zhao const struct kfd2kgd_calls *f2g; 6355b983db8SAlex Deucher struct pci_dev *pdev = adev->pdev; 636050091abSYong Zhao 637c868d584SAlex Deucher switch (adev->asic_type) { 638c868d584SAlex Deucher #ifdef KFD_SUPPORT_IOMMU_V2 639c868d584SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 640c868d584SAlex Deucher case CHIP_KAVERI: 641c868d584SAlex Deucher if (vf) 642c868d584SAlex Deucher device_info = NULL; 643c868d584SAlex Deucher else 644c868d584SAlex Deucher device_info = &kaveri_device_info; 645c868d584SAlex Deucher f2g = &gfx_v7_kfd2kgd; 646c868d584SAlex Deucher break; 647c868d584SAlex Deucher #endif 648c868d584SAlex Deucher case CHIP_CARRIZO: 649c868d584SAlex Deucher if (vf) 650c868d584SAlex Deucher device_info = NULL; 651c868d584SAlex Deucher else 652c868d584SAlex Deucher device_info = &carrizo_device_info; 653c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 654c868d584SAlex Deucher break; 655c868d584SAlex Deucher #endif 656c868d584SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 657c868d584SAlex Deucher case CHIP_HAWAII: 658c868d584SAlex Deucher if (vf) 659c868d584SAlex Deucher device_info = NULL; 660c868d584SAlex Deucher else 661c868d584SAlex Deucher device_info = &hawaii_device_info; 662c868d584SAlex Deucher f2g = &gfx_v7_kfd2kgd; 663c868d584SAlex Deucher break; 664c868d584SAlex Deucher #endif 665c868d584SAlex Deucher case CHIP_TONGA: 666c868d584SAlex Deucher if (vf) 667c868d584SAlex Deucher device_info = NULL; 668c868d584SAlex Deucher else 669c868d584SAlex Deucher device_info = &tonga_device_info; 670c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 671c868d584SAlex Deucher break; 672c868d584SAlex Deucher case CHIP_FIJI: 673c868d584SAlex Deucher if (vf) 674c868d584SAlex Deucher device_info = &fiji_vf_device_info; 675c868d584SAlex Deucher else 676c868d584SAlex Deucher device_info = &fiji_device_info; 677c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 678c868d584SAlex Deucher break; 679c868d584SAlex Deucher case CHIP_POLARIS10: 680c868d584SAlex Deucher if (vf) 681c868d584SAlex Deucher device_info = &polaris10_vf_device_info; 682c868d584SAlex Deucher else 683c868d584SAlex Deucher device_info = &polaris10_device_info; 684c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 685c868d584SAlex Deucher break; 686c868d584SAlex Deucher case CHIP_POLARIS11: 687c868d584SAlex Deucher if (vf) 688c868d584SAlex Deucher device_info = NULL; 689c868d584SAlex Deucher else 690c868d584SAlex Deucher device_info = &polaris11_device_info; 691c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 692c868d584SAlex Deucher break; 693c868d584SAlex Deucher case CHIP_POLARIS12: 694c868d584SAlex Deucher if (vf) 695c868d584SAlex Deucher device_info = NULL; 696c868d584SAlex Deucher else 697c868d584SAlex Deucher device_info = &polaris12_device_info; 698c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 699c868d584SAlex Deucher break; 700c868d584SAlex Deucher case CHIP_VEGAM: 701c868d584SAlex Deucher if (vf) 702c868d584SAlex Deucher device_info = NULL; 703c868d584SAlex Deucher else 704c868d584SAlex Deucher device_info = &vegam_device_info; 705c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 706c868d584SAlex Deucher break; 707c868d584SAlex Deucher default: 708c868d584SAlex Deucher switch (adev->ip_versions[GC_HWIP][0]) { 709c868d584SAlex Deucher case IP_VERSION(9, 0, 1): 710c868d584SAlex Deucher if (vf) 711c868d584SAlex Deucher device_info = &vega10_vf_device_info; 712c868d584SAlex Deucher else 713c868d584SAlex Deucher device_info = &vega10_device_info; 714c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 715c868d584SAlex Deucher break; 716c868d584SAlex Deucher #ifdef KFD_SUPPORT_IOMMU_V2 717c868d584SAlex Deucher case IP_VERSION(9, 1, 0): 718c868d584SAlex Deucher case IP_VERSION(9, 2, 2): 719c868d584SAlex Deucher if (vf) 720c868d584SAlex Deucher device_info = NULL; 721c868d584SAlex Deucher else 722c868d584SAlex Deucher device_info = &raven_device_info; 723c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 724c868d584SAlex Deucher break; 725c868d584SAlex Deucher #endif 726c868d584SAlex Deucher case IP_VERSION(9, 2, 1): 727c868d584SAlex Deucher if (vf) 728c868d584SAlex Deucher device_info = NULL; 729c868d584SAlex Deucher else 730c868d584SAlex Deucher device_info = &vega12_device_info; 731c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 732c868d584SAlex Deucher break; 733c868d584SAlex Deucher case IP_VERSION(9, 3, 0): 734c868d584SAlex Deucher if (vf) 735c868d584SAlex Deucher device_info = NULL; 736c868d584SAlex Deucher else 737c868d584SAlex Deucher device_info = &renoir_device_info; 738c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 739c868d584SAlex Deucher break; 740c868d584SAlex Deucher case IP_VERSION(9, 4, 0): 741c868d584SAlex Deucher if (vf) 742c868d584SAlex Deucher device_info = NULL; 743c868d584SAlex Deucher else 744c868d584SAlex Deucher device_info = &vega20_device_info; 745c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 746c868d584SAlex Deucher break; 747c868d584SAlex Deucher case IP_VERSION(9, 4, 1): 748c868d584SAlex Deucher device_info = &arcturus_device_info; 749c868d584SAlex Deucher f2g = &arcturus_kfd2kgd; 750c868d584SAlex Deucher break; 751c868d584SAlex Deucher case IP_VERSION(9, 4, 2): 752c868d584SAlex Deucher device_info = &aldebaran_device_info; 753c868d584SAlex Deucher f2g = &aldebaran_kfd2kgd; 754c868d584SAlex Deucher break; 755c868d584SAlex Deucher case IP_VERSION(10, 1, 10): 756c868d584SAlex Deucher if (vf) 757c868d584SAlex Deucher device_info = NULL; 758c868d584SAlex Deucher else 759c868d584SAlex Deucher device_info = &navi10_device_info; 760c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 761c868d584SAlex Deucher break; 762c868d584SAlex Deucher case IP_VERSION(10, 1, 2): 763c868d584SAlex Deucher device_info = &navi12_device_info; 764c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 765c868d584SAlex Deucher break; 766c868d584SAlex Deucher case IP_VERSION(10, 1, 1): 767c868d584SAlex Deucher if (vf) 768c868d584SAlex Deucher device_info = NULL; 769c868d584SAlex Deucher else 770c868d584SAlex Deucher device_info = &navi14_device_info; 771c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 772c868d584SAlex Deucher break; 773c868d584SAlex Deucher case IP_VERSION(10, 1, 3): 774c868d584SAlex Deucher if (vf) 775c868d584SAlex Deucher device_info = NULL; 776c868d584SAlex Deucher else 777c868d584SAlex Deucher device_info = &cyan_skillfish_device_info; 778c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 779c868d584SAlex Deucher break; 780c868d584SAlex Deucher case IP_VERSION(10, 3, 0): 781c868d584SAlex Deucher device_info = &sienna_cichlid_device_info; 782c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 783c868d584SAlex Deucher break; 784c868d584SAlex Deucher case IP_VERSION(10, 3, 2): 785c868d584SAlex Deucher device_info = &navy_flounder_device_info; 786c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 787c868d584SAlex Deucher break; 788c868d584SAlex Deucher case IP_VERSION(10, 3, 1): 789c868d584SAlex Deucher if (vf) 790c868d584SAlex Deucher device_info = NULL; 791c868d584SAlex Deucher else 792c868d584SAlex Deucher device_info = &vangogh_device_info; 793c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 794c868d584SAlex Deucher break; 795c868d584SAlex Deucher case IP_VERSION(10, 3, 4): 796c868d584SAlex Deucher device_info = &dimgrey_cavefish_device_info; 797c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 798c868d584SAlex Deucher break; 799c868d584SAlex Deucher case IP_VERSION(10, 3, 5): 800c868d584SAlex Deucher device_info = &beige_goby_device_info; 801c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 802c868d584SAlex Deucher break; 803c868d584SAlex Deucher case IP_VERSION(10, 3, 3): 804c868d584SAlex Deucher if (vf) 805c868d584SAlex Deucher device_info = NULL; 806c868d584SAlex Deucher else 807c868d584SAlex Deucher device_info = &yellow_carp_device_info; 808c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 809c868d584SAlex Deucher break; 810c868d584SAlex Deucher default: 811c868d584SAlex Deucher return NULL; 812050091abSYong Zhao } 813c868d584SAlex Deucher break; 814c868d584SAlex Deucher } 8154a488a7aSOded Gabbay 816aa5e899dSDan Carpenter if (!device_info || !f2g) { 817*e4804a39SGraham Sider if (adev->ip_versions[GC_HWIP][0]) 818*e4804a39SGraham Sider dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n", 819*e4804a39SGraham Sider adev->ip_versions[GC_HWIP][0], vf ? "VF" : ""); 820*e4804a39SGraham Sider else 821050091abSYong Zhao dev_err(kfd_device, "%s %s not supported in kfd\n", 822c868d584SAlex Deucher amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); 8234a488a7aSOded Gabbay return NULL; 8244ebc7182SYong Zhao } 8254a488a7aSOded Gabbay 826d35f00d8SEric Huang kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); 827d35f00d8SEric Huang if (!kfd) 828d35f00d8SEric Huang return NULL; 829d35f00d8SEric Huang 830c6c57446SGraham Sider kfd->adev = adev; 8314a488a7aSOded Gabbay kfd->device_info = device_info; 8324a488a7aSOded Gabbay kfd->pdev = pdev; 83319f6d2a6SOded Gabbay kfd->init_complete = false; 834cea405b1SXihan Zhang kfd->kfd2kgd = f2g; 83543d8107fSHarish Kasiviswanathan atomic_set(&kfd->compute_profile, 0); 836cea405b1SXihan Zhang 837cea405b1SXihan Zhang mutex_init(&kfd->doorbell_mutex); 838cea405b1SXihan Zhang memset(&kfd->doorbell_available_index, 0, 839cea405b1SXihan Zhang sizeof(kfd->doorbell_available_index)); 8404a488a7aSOded Gabbay 8419b54d201SEric Huang atomic_set(&kfd->sram_ecc_flag, 0); 8429b54d201SEric Huang 84359d7115dSMukul Joshi ida_init(&kfd->doorbell_ida); 84459d7115dSMukul Joshi 8454a488a7aSOded Gabbay return kfd; 8464a488a7aSOded Gabbay } 8474a488a7aSOded Gabbay 848373d7080SFelix Kuehling static void kfd_cwsr_init(struct kfd_dev *kfd) 849373d7080SFelix Kuehling { 850373d7080SFelix Kuehling if (cwsr_enable && kfd->device_info->supports_cwsr) { 8513e76c239SFelix Kuehling if (kfd->device_info->asic_family < CHIP_VEGA10) { 852373d7080SFelix Kuehling BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); 853373d7080SFelix Kuehling kfd->cwsr_isa = cwsr_trap_gfx8_hex; 854373d7080SFelix Kuehling kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); 8550ef6845cSJay Cornwall } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { 8563baa24f0SOak Zeng BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); 8573baa24f0SOak Zeng kfd->cwsr_isa = cwsr_trap_arcturus_hex; 8583baa24f0SOak Zeng kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); 8590ef6845cSJay Cornwall } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { 8600ef6845cSJay Cornwall BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); 8610ef6845cSJay Cornwall kfd->cwsr_isa = cwsr_trap_aldebaran_hex; 8620ef6845cSJay Cornwall kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); 86314328aa5SPhilip Cox } else if (kfd->device_info->asic_family < CHIP_NAVI10) { 8643e76c239SFelix Kuehling BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); 8653e76c239SFelix Kuehling kfd->cwsr_isa = cwsr_trap_gfx9_hex; 8663e76c239SFelix Kuehling kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); 86780b6cfedSJay Cornwall } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { 86880b6cfedSJay Cornwall BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); 86980b6cfedSJay Cornwall kfd->cwsr_isa = cwsr_trap_nv1x_hex; 87080b6cfedSJay Cornwall kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); 87114328aa5SPhilip Cox } else { 87214328aa5SPhilip Cox BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); 87314328aa5SPhilip Cox kfd->cwsr_isa = cwsr_trap_gfx10_hex; 87414328aa5SPhilip Cox kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); 8753e76c239SFelix Kuehling } 8763e76c239SFelix Kuehling 877373d7080SFelix Kuehling kfd->cwsr_enabled = true; 878373d7080SFelix Kuehling } 879373d7080SFelix Kuehling } 880373d7080SFelix Kuehling 88129633d0eSJoseph Greathouse static int kfd_gws_init(struct kfd_dev *kfd) 88229633d0eSJoseph Greathouse { 88329633d0eSJoseph Greathouse int ret = 0; 88429633d0eSJoseph Greathouse 88529633d0eSJoseph Greathouse if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) 88629633d0eSJoseph Greathouse return 0; 88729633d0eSJoseph Greathouse 88829633d0eSJoseph Greathouse if (hws_gws_support 889fea7d919SJoseph Greathouse || (kfd->device_info->asic_family == CHIP_VEGA10 890fea7d919SJoseph Greathouse && kfd->mec2_fw_version >= 0x81b3) 891fea7d919SJoseph Greathouse || (kfd->device_info->asic_family >= CHIP_VEGA12 89229633d0eSJoseph Greathouse && kfd->device_info->asic_family <= CHIP_RAVEN 893fea7d919SJoseph Greathouse && kfd->mec2_fw_version >= 0x1b3) 894fea7d919SJoseph Greathouse || (kfd->device_info->asic_family == CHIP_ARCTURUS 8958baa6018SHarish Kasiviswanathan && kfd->mec2_fw_version >= 0x30) 8968baa6018SHarish Kasiviswanathan || (kfd->device_info->asic_family == CHIP_ALDEBARAN 8978baa6018SHarish Kasiviswanathan && kfd->mec2_fw_version >= 0x28)) 8986bfc7c7eSGraham Sider ret = amdgpu_amdkfd_alloc_gws(kfd->adev, 89902274fc0SGraham Sider kfd->adev->gds.gws_size, &kfd->gws); 90029633d0eSJoseph Greathouse 90129633d0eSJoseph Greathouse return ret; 90229633d0eSJoseph Greathouse } 90329633d0eSJoseph Greathouse 904938a0650SAmber Lin static void kfd_smi_init(struct kfd_dev *dev) { 905938a0650SAmber Lin INIT_LIST_HEAD(&dev->smi_clients); 906938a0650SAmber Lin spin_lock_init(&dev->smi_lock); 907938a0650SAmber Lin } 908938a0650SAmber Lin 9094a488a7aSOded Gabbay bool kgd2kfd_device_init(struct kfd_dev *kfd, 9103a0c3423SHarish Kasiviswanathan struct drm_device *ddev, 9114a488a7aSOded Gabbay const struct kgd2kfd_shared_resources *gpu_resources) 9124a488a7aSOded Gabbay { 913fd6a440eSJonathan Kim unsigned int size, map_process_packet_size; 91419f6d2a6SOded Gabbay 9153a0c3423SHarish Kasiviswanathan kfd->ddev = ddev; 916574c4183SGraham Sider kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 9175ade6c9cSFelix Kuehling KGD_ENGINE_MEC1); 918574c4183SGraham Sider kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 91929633d0eSJoseph Greathouse KGD_ENGINE_MEC2); 920574c4183SGraham Sider kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 9215ade6c9cSFelix Kuehling KGD_ENGINE_SDMA1); 9224a488a7aSOded Gabbay kfd->shared_resources = *gpu_resources; 9234a488a7aSOded Gabbay 92444008d7aSYong Zhao kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; 92544008d7aSYong Zhao kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 92644008d7aSYong Zhao kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd 92744008d7aSYong Zhao - kfd->vm_info.first_vmid_kfd + 1; 92844008d7aSYong Zhao 929e312af6cSFelix Kuehling /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. 930e312af6cSFelix Kuehling * 32 and 64-bit requests are possible and must be 931e312af6cSFelix Kuehling * supported. 932e312af6cSFelix Kuehling */ 9336bfc7c7eSGraham Sider kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); 934e312af6cSFelix Kuehling if (!kfd->pci_atomic_requested && 935e312af6cSFelix Kuehling kfd->device_info->needs_pci_atomics && 936e312af6cSFelix Kuehling (!kfd->device_info->no_atomic_fw_version || 937e312af6cSFelix Kuehling kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { 938e312af6cSFelix Kuehling dev_info(kfd_device, 939e312af6cSFelix Kuehling "skipped device %x:%x, PCI rejects atomics %d<%d\n", 940e312af6cSFelix Kuehling kfd->pdev->vendor, kfd->pdev->device, 941e312af6cSFelix Kuehling kfd->mec_fw_version, 942e312af6cSFelix Kuehling kfd->device_info->no_atomic_fw_version); 943e312af6cSFelix Kuehling return false; 944e312af6cSFelix Kuehling } 945e312af6cSFelix Kuehling 946a99c6d4fSFelix Kuehling /* Verify module parameters regarding mapped process number*/ 947a99c6d4fSFelix Kuehling if ((hws_max_conc_proc < 0) 948a99c6d4fSFelix Kuehling || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { 949a99c6d4fSFelix Kuehling dev_err(kfd_device, 950a99c6d4fSFelix Kuehling "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", 951a99c6d4fSFelix Kuehling hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, 952a99c6d4fSFelix Kuehling kfd->vm_info.vmid_num_kfd); 953a99c6d4fSFelix Kuehling kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; 954a99c6d4fSFelix Kuehling } else 955a99c6d4fSFelix Kuehling kfd->max_proc_per_quantum = hws_max_conc_proc; 956a99c6d4fSFelix Kuehling 95719f6d2a6SOded Gabbay /* calculate max size of mqds needed for queues */ 958b8cbab04SOded Gabbay size = max_num_of_queues_per_device * 95919f6d2a6SOded Gabbay kfd->device_info->mqd_size_aligned; 96019f6d2a6SOded Gabbay 961e18e794eSOded Gabbay /* 962e18e794eSOded Gabbay * calculate max size of runlist packet. 963e18e794eSOded Gabbay * There can be only 2 packets at once 964e18e794eSOded Gabbay */ 965fd6a440eSJonathan Kim map_process_packet_size = 966fd6a440eSJonathan Kim kfd->device_info->asic_family == CHIP_ALDEBARAN ? 967fd6a440eSJonathan Kim sizeof(struct pm4_mes_map_process_aldebaran) : 968fd6a440eSJonathan Kim sizeof(struct pm4_mes_map_process); 969fd6a440eSJonathan Kim size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + 970507968ddSFelix Kuehling max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) 971507968ddSFelix Kuehling + sizeof(struct pm4_mes_runlist)) * 2; 972e18e794eSOded Gabbay 973e18e794eSOded Gabbay /* Add size of HIQ & DIQ */ 974e18e794eSOded Gabbay size += KFD_KERNEL_QUEUE_SIZE * 2; 975e18e794eSOded Gabbay 976e18e794eSOded Gabbay /* add another 512KB for all other allocations on gart (HPD, fences) */ 97719f6d2a6SOded Gabbay size += 512 * 1024; 97819f6d2a6SOded Gabbay 9797cd52c91SAmber Lin if (amdgpu_amdkfd_alloc_gtt_mem( 9806bfc7c7eSGraham Sider kfd->adev, size, &kfd->gtt_mem, 98115426dbbSYong Zhao &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, 98215426dbbSYong Zhao false)) { 98379775b62SKent Russell dev_err(kfd_device, "Could not allocate %d bytes\n", size); 984e09d4fc8SOak Zeng goto alloc_gtt_mem_failure; 98519f6d2a6SOded Gabbay } 98619f6d2a6SOded Gabbay 98779775b62SKent Russell dev_info(kfd_device, "Allocated %d bytes on gart\n", size); 988e18e794eSOded Gabbay 98973a1da0bSOded Gabbay /* Initialize GTT sa with 512 byte chunk size */ 99073a1da0bSOded Gabbay if (kfd_gtt_sa_init(kfd, size, 512) != 0) { 99179775b62SKent Russell dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); 99273a1da0bSOded Gabbay goto kfd_gtt_sa_init_error; 99373a1da0bSOded Gabbay } 99473a1da0bSOded Gabbay 995735df2baSFelix Kuehling if (kfd_doorbell_init(kfd)) { 996735df2baSFelix Kuehling dev_err(kfd_device, 997735df2baSFelix Kuehling "Error initializing doorbell aperture\n"); 998735df2baSFelix Kuehling goto kfd_doorbell_error; 999735df2baSFelix Kuehling } 100019f6d2a6SOded Gabbay 100102274fc0SGraham Sider kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; 10020c1690e3SShaoyun Liu 100302274fc0SGraham Sider kfd->noretry = kfd->adev->gmc.noretry; 10049b498efaSAlex Deucher 10052249d558SAndrew Lewycky if (kfd_interrupt_init(kfd)) { 100679775b62SKent Russell dev_err(kfd_device, "Error initializing interrupts\n"); 10072249d558SAndrew Lewycky goto kfd_interrupt_error; 10082249d558SAndrew Lewycky } 10092249d558SAndrew Lewycky 101064c7f8cfSBen Goz kfd->dqm = device_queue_manager_init(kfd); 101164c7f8cfSBen Goz if (!kfd->dqm) { 101279775b62SKent Russell dev_err(kfd_device, "Error initializing queue manager\n"); 101364c7f8cfSBen Goz goto device_queue_manager_error; 101464c7f8cfSBen Goz } 101564c7f8cfSBen Goz 101629633d0eSJoseph Greathouse /* If supported on this device, allocate global GWS that is shared 101729633d0eSJoseph Greathouse * by all KFD processes 101829633d0eSJoseph Greathouse */ 101929633d0eSJoseph Greathouse if (kfd_gws_init(kfd)) { 102029633d0eSJoseph Greathouse dev_err(kfd_device, "Could not allocate %d gws\n", 102102274fc0SGraham Sider kfd->adev->gds.gws_size); 102229633d0eSJoseph Greathouse goto gws_error; 102329633d0eSJoseph Greathouse } 102429633d0eSJoseph Greathouse 10256127896fSHuang Rui /* If CRAT is broken, won't set iommu enabled */ 10266127896fSHuang Rui kfd_double_confirm_iommu_support(kfd); 10276127896fSHuang Rui 102864d1c3a4SFelix Kuehling if (kfd_iommu_device_init(kfd)) { 10296f4b590aSYifan Zhang kfd->use_iommu_v2 = false; 103064d1c3a4SFelix Kuehling dev_err(kfd_device, "Error initializing iommuv2\n"); 103164d1c3a4SFelix Kuehling goto device_iommu_error; 103264c7f8cfSBen Goz } 103364c7f8cfSBen Goz 1034373d7080SFelix Kuehling kfd_cwsr_init(kfd); 1035373d7080SFelix Kuehling 103656c5977eSGraham Sider svm_migrate_init(kfd->adev); 1037814ab993SPhilip Yang 1038afd18180SYifan Zhang if(kgd2kfd_resume_iommu(kfd)) 1039afd18180SYifan Zhang goto device_iommu_error; 1040afd18180SYifan Zhang 1041b8935a7cSYong Zhao if (kfd_resume(kfd)) 1042b8935a7cSYong Zhao goto kfd_resume_error; 1043b8935a7cSYong Zhao 1044fbeb661bSYair Shachar kfd->dbgmgr = NULL; 1045fbeb661bSYair Shachar 1046465ab9e0SOak Zeng if (kfd_topology_add_device(kfd)) { 1047465ab9e0SOak Zeng dev_err(kfd_device, "Error adding device to topology\n"); 1048465ab9e0SOak Zeng goto kfd_topology_add_device_error; 1049465ab9e0SOak Zeng } 1050465ab9e0SOak Zeng 1051938a0650SAmber Lin kfd_smi_init(kfd); 1052938a0650SAmber Lin 10534a488a7aSOded Gabbay kfd->init_complete = true; 105479775b62SKent Russell dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, 10554a488a7aSOded Gabbay kfd->pdev->device); 10564a488a7aSOded Gabbay 105779775b62SKent Russell pr_debug("Starting kfd with the following scheduling policy %d\n", 1058d146c5a7SFelix Kuehling kfd->dqm->sched_policy); 105964c7f8cfSBen Goz 106019f6d2a6SOded Gabbay goto out; 106119f6d2a6SOded Gabbay 1062465ab9e0SOak Zeng kfd_topology_add_device_error: 1063b8935a7cSYong Zhao kfd_resume_error: 106464d1c3a4SFelix Kuehling device_iommu_error: 106529633d0eSJoseph Greathouse gws_error: 106664c7f8cfSBen Goz device_queue_manager_uninit(kfd->dqm); 106764c7f8cfSBen Goz device_queue_manager_error: 10682249d558SAndrew Lewycky kfd_interrupt_exit(kfd); 10692249d558SAndrew Lewycky kfd_interrupt_error: 1070735df2baSFelix Kuehling kfd_doorbell_fini(kfd); 1071735df2baSFelix Kuehling kfd_doorbell_error: 107273a1da0bSOded Gabbay kfd_gtt_sa_fini(kfd); 107373a1da0bSOded Gabbay kfd_gtt_sa_init_error: 10746bfc7c7eSGraham Sider amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 1075e09d4fc8SOak Zeng alloc_gtt_mem_failure: 107629633d0eSJoseph Greathouse if (kfd->gws) 10776bfc7c7eSGraham Sider amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); 107819f6d2a6SOded Gabbay dev_err(kfd_device, 107979775b62SKent Russell "device %x:%x NOT added due to errors\n", 108019f6d2a6SOded Gabbay kfd->pdev->vendor, kfd->pdev->device); 108119f6d2a6SOded Gabbay out: 108219f6d2a6SOded Gabbay return kfd->init_complete; 10834a488a7aSOded Gabbay } 10844a488a7aSOded Gabbay 10854a488a7aSOded Gabbay void kgd2kfd_device_exit(struct kfd_dev *kfd) 10864a488a7aSOded Gabbay { 1087b17f068aSOded Gabbay if (kfd->init_complete) { 108864c7f8cfSBen Goz device_queue_manager_uninit(kfd->dqm); 10892249d558SAndrew Lewycky kfd_interrupt_exit(kfd); 109019f6d2a6SOded Gabbay kfd_topology_remove_device(kfd); 1091735df2baSFelix Kuehling kfd_doorbell_fini(kfd); 109259d7115dSMukul Joshi ida_destroy(&kfd->doorbell_ida); 109373a1da0bSOded Gabbay kfd_gtt_sa_fini(kfd); 10946bfc7c7eSGraham Sider amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 109529633d0eSJoseph Greathouse if (kfd->gws) 10966bfc7c7eSGraham Sider amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); 1097b17f068aSOded Gabbay } 10985b5c4e40SEvgeny Pinchuk 10994a488a7aSOded Gabbay kfree(kfd); 11004a488a7aSOded Gabbay } 11014a488a7aSOded Gabbay 1102e3b7a967SShaoyun Liu int kgd2kfd_pre_reset(struct kfd_dev *kfd) 1103e3b7a967SShaoyun Liu { 1104e42051d2SShaoyun Liu if (!kfd->init_complete) 1105e42051d2SShaoyun Liu return 0; 110609c34e8dSFelix Kuehling 110755977744SMukul Joshi kfd_smi_event_update_gpu_reset(kfd, false); 110855977744SMukul Joshi 110909c34e8dSFelix Kuehling kfd->dqm->ops.pre_reset(kfd->dqm); 111009c34e8dSFelix Kuehling 11119593f4d6SRajneesh Bhardwaj kgd2kfd_suspend(kfd, false); 1112e42051d2SShaoyun Liu 1113e42051d2SShaoyun Liu kfd_signal_reset_event(kfd); 1114e3b7a967SShaoyun Liu return 0; 1115e3b7a967SShaoyun Liu } 1116e3b7a967SShaoyun Liu 1117e42051d2SShaoyun Liu /* 1118e42051d2SShaoyun Liu * Fix me. KFD won't be able to resume existing process for now. 1119e42051d2SShaoyun Liu * We will keep all existing process in a evicted state and 1120e42051d2SShaoyun Liu * wait the process to be terminated. 1121e42051d2SShaoyun Liu */ 1122e42051d2SShaoyun Liu 1123e3b7a967SShaoyun Liu int kgd2kfd_post_reset(struct kfd_dev *kfd) 1124e3b7a967SShaoyun Liu { 1125a1bd079fSyu kuai int ret; 1126e42051d2SShaoyun Liu 1127e42051d2SShaoyun Liu if (!kfd->init_complete) 1128e3b7a967SShaoyun Liu return 0; 1129e42051d2SShaoyun Liu 1130e42051d2SShaoyun Liu ret = kfd_resume(kfd); 1131e42051d2SShaoyun Liu if (ret) 1132e42051d2SShaoyun Liu return ret; 1133a1bd079fSyu kuai atomic_dec(&kfd_locked); 11349b54d201SEric Huang 11359b54d201SEric Huang atomic_set(&kfd->sram_ecc_flag, 0); 11369b54d201SEric Huang 113755977744SMukul Joshi kfd_smi_event_update_gpu_reset(kfd, true); 113855977744SMukul Joshi 1139e42051d2SShaoyun Liu return 0; 1140e42051d2SShaoyun Liu } 1141e42051d2SShaoyun Liu 1142e42051d2SShaoyun Liu bool kfd_is_locked(void) 1143e42051d2SShaoyun Liu { 1144e42051d2SShaoyun Liu return (atomic_read(&kfd_locked) > 0); 1145e3b7a967SShaoyun Liu } 1146e3b7a967SShaoyun Liu 11479593f4d6SRajneesh Bhardwaj void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) 11484a488a7aSOded Gabbay { 1149733fa1f7SYong Zhao if (!kfd->init_complete) 1150733fa1f7SYong Zhao return; 1151733fa1f7SYong Zhao 11529593f4d6SRajneesh Bhardwaj /* for runtime suspend, skip locking kfd */ 11539593f4d6SRajneesh Bhardwaj if (!run_pm) { 115426103436SFelix Kuehling /* For first KFD device suspend all the KFD processes */ 1155e42051d2SShaoyun Liu if (atomic_inc_return(&kfd_locked) == 1) 115626103436SFelix Kuehling kfd_suspend_all_processes(); 11579593f4d6SRajneesh Bhardwaj } 115826103436SFelix Kuehling 115945c9a5e4SOded Gabbay kfd->dqm->ops.stop(kfd->dqm); 116064d1c3a4SFelix Kuehling kfd_iommu_suspend(kfd); 11614a488a7aSOded Gabbay } 11624a488a7aSOded Gabbay 11639593f4d6SRajneesh Bhardwaj int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) 11644a488a7aSOded Gabbay { 116526103436SFelix Kuehling int ret, count; 116626103436SFelix Kuehling 1167b8935a7cSYong Zhao if (!kfd->init_complete) 1168b8935a7cSYong Zhao return 0; 1169b17f068aSOded Gabbay 117026103436SFelix Kuehling ret = kfd_resume(kfd); 117126103436SFelix Kuehling if (ret) 117226103436SFelix Kuehling return ret; 1173b17f068aSOded Gabbay 11749593f4d6SRajneesh Bhardwaj /* for runtime resume, skip unlocking kfd */ 11759593f4d6SRajneesh Bhardwaj if (!run_pm) { 1176e42051d2SShaoyun Liu count = atomic_dec_return(&kfd_locked); 117726103436SFelix Kuehling WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); 117826103436SFelix Kuehling if (count == 0) 117926103436SFelix Kuehling ret = kfd_resume_all_processes(); 11809593f4d6SRajneesh Bhardwaj } 118126103436SFelix Kuehling 118226103436SFelix Kuehling return ret; 11834ebc7182SYong Zhao } 11844ebc7182SYong Zhao 1185f8846323SJames Zhu int kgd2kfd_resume_iommu(struct kfd_dev *kfd) 1186b8935a7cSYong Zhao { 1187b8935a7cSYong Zhao int err = 0; 1188b8935a7cSYong Zhao 118964d1c3a4SFelix Kuehling err = kfd_iommu_resume(kfd); 1190f8846323SJames Zhu if (err) 119164d1c3a4SFelix Kuehling dev_err(kfd_device, 119264d1c3a4SFelix Kuehling "Failed to resume IOMMU for device %x:%x\n", 119364d1c3a4SFelix Kuehling kfd->pdev->vendor, kfd->pdev->device); 119464d1c3a4SFelix Kuehling return err; 119564d1c3a4SFelix Kuehling } 1196733fa1f7SYong Zhao 1197f8846323SJames Zhu static int kfd_resume(struct kfd_dev *kfd) 1198f8846323SJames Zhu { 1199f8846323SJames Zhu int err = 0; 1200f8846323SJames Zhu 1201b8935a7cSYong Zhao err = kfd->dqm->ops.start(kfd->dqm); 1202499f4d38SYifan Zhang if (err) 1203b8935a7cSYong Zhao dev_err(kfd_device, 1204b8935a7cSYong Zhao "Error starting queue manager for device %x:%x\n", 1205b8935a7cSYong Zhao kfd->pdev->vendor, kfd->pdev->device); 1206b17f068aSOded Gabbay 1207b8935a7cSYong Zhao return err; 12084a488a7aSOded Gabbay } 12094a488a7aSOded Gabbay 1210b3eca59dSPhilip Yang static inline void kfd_queue_work(struct workqueue_struct *wq, 1211b3eca59dSPhilip Yang struct work_struct *work) 1212b3eca59dSPhilip Yang { 1213b3eca59dSPhilip Yang int cpu, new_cpu; 1214b3eca59dSPhilip Yang 1215b3eca59dSPhilip Yang cpu = new_cpu = smp_processor_id(); 1216b3eca59dSPhilip Yang do { 1217b3eca59dSPhilip Yang new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; 1218b3eca59dSPhilip Yang if (cpu_to_node(new_cpu) == numa_node_id()) 1219b3eca59dSPhilip Yang break; 1220b3eca59dSPhilip Yang } while (cpu != new_cpu); 1221b3eca59dSPhilip Yang 1222b3eca59dSPhilip Yang queue_work_on(new_cpu, wq, work); 1223b3eca59dSPhilip Yang } 1224b3eca59dSPhilip Yang 1225b3f5e6b4SAndrew Lewycky /* This is called directly from KGD at ISR. */ 1226b3f5e6b4SAndrew Lewycky void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 12274a488a7aSOded Gabbay { 122858e69886SLan Xiao uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; 122958e69886SLan Xiao bool is_patched = false; 12302383a767SChristian König unsigned long flags; 123158e69886SLan Xiao 12322249d558SAndrew Lewycky if (!kfd->init_complete) 12332249d558SAndrew Lewycky return; 12342249d558SAndrew Lewycky 123558e69886SLan Xiao if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { 123658e69886SLan Xiao dev_err_once(kfd_device, "Ring entry too small\n"); 123758e69886SLan Xiao return; 123858e69886SLan Xiao } 123958e69886SLan Xiao 12402383a767SChristian König spin_lock_irqsave(&kfd->interrupt_lock, flags); 12412249d558SAndrew Lewycky 12422249d558SAndrew Lewycky if (kfd->interrupts_active 124358e69886SLan Xiao && interrupt_is_wanted(kfd, ih_ring_entry, 124458e69886SLan Xiao patched_ihre, &is_patched) 124558e69886SLan Xiao && enqueue_ih_ring_entry(kfd, 124658e69886SLan Xiao is_patched ? patched_ihre : ih_ring_entry)) 1247b3eca59dSPhilip Yang kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); 12482249d558SAndrew Lewycky 12492383a767SChristian König spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 12504a488a7aSOded Gabbay } 12516e81090bSOded Gabbay 12526b95e797SFelix Kuehling int kgd2kfd_quiesce_mm(struct mm_struct *mm) 12536b95e797SFelix Kuehling { 12546b95e797SFelix Kuehling struct kfd_process *p; 12556b95e797SFelix Kuehling int r; 12566b95e797SFelix Kuehling 12576b95e797SFelix Kuehling /* Because we are called from arbitrary context (workqueue) as opposed 12586b95e797SFelix Kuehling * to process context, kfd_process could attempt to exit while we are 12596b95e797SFelix Kuehling * running so the lookup function increments the process ref count. 12606b95e797SFelix Kuehling */ 12616b95e797SFelix Kuehling p = kfd_lookup_process_by_mm(mm); 12626b95e797SFelix Kuehling if (!p) 12636b95e797SFelix Kuehling return -ESRCH; 12646b95e797SFelix Kuehling 1265b2057956SFelix Kuehling WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 12666b95e797SFelix Kuehling r = kfd_process_evict_queues(p); 12676b95e797SFelix Kuehling 12686b95e797SFelix Kuehling kfd_unref_process(p); 12696b95e797SFelix Kuehling return r; 12706b95e797SFelix Kuehling } 12716b95e797SFelix Kuehling 12726b95e797SFelix Kuehling int kgd2kfd_resume_mm(struct mm_struct *mm) 12736b95e797SFelix Kuehling { 12746b95e797SFelix Kuehling struct kfd_process *p; 12756b95e797SFelix Kuehling int r; 12766b95e797SFelix Kuehling 12776b95e797SFelix Kuehling /* Because we are called from arbitrary context (workqueue) as opposed 12786b95e797SFelix Kuehling * to process context, kfd_process could attempt to exit while we are 12796b95e797SFelix Kuehling * running so the lookup function increments the process ref count. 12806b95e797SFelix Kuehling */ 12816b95e797SFelix Kuehling p = kfd_lookup_process_by_mm(mm); 12826b95e797SFelix Kuehling if (!p) 12836b95e797SFelix Kuehling return -ESRCH; 12846b95e797SFelix Kuehling 12856b95e797SFelix Kuehling r = kfd_process_restore_queues(p); 12866b95e797SFelix Kuehling 12876b95e797SFelix Kuehling kfd_unref_process(p); 12886b95e797SFelix Kuehling return r; 12896b95e797SFelix Kuehling } 12906b95e797SFelix Kuehling 129126103436SFelix Kuehling /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will 129226103436SFelix Kuehling * prepare for safe eviction of KFD BOs that belong to the specified 129326103436SFelix Kuehling * process. 129426103436SFelix Kuehling * 129526103436SFelix Kuehling * @mm: mm_struct that identifies the specified KFD process 129626103436SFelix Kuehling * @fence: eviction fence attached to KFD process BOs 129726103436SFelix Kuehling * 129826103436SFelix Kuehling */ 129926103436SFelix Kuehling int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 130026103436SFelix Kuehling struct dma_fence *fence) 130126103436SFelix Kuehling { 130226103436SFelix Kuehling struct kfd_process *p; 130326103436SFelix Kuehling unsigned long active_time; 130426103436SFelix Kuehling unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); 130526103436SFelix Kuehling 130626103436SFelix Kuehling if (!fence) 130726103436SFelix Kuehling return -EINVAL; 130826103436SFelix Kuehling 130926103436SFelix Kuehling if (dma_fence_is_signaled(fence)) 131026103436SFelix Kuehling return 0; 131126103436SFelix Kuehling 131226103436SFelix Kuehling p = kfd_lookup_process_by_mm(mm); 131326103436SFelix Kuehling if (!p) 131426103436SFelix Kuehling return -ENODEV; 131526103436SFelix Kuehling 131626103436SFelix Kuehling if (fence->seqno == p->last_eviction_seqno) 131726103436SFelix Kuehling goto out; 131826103436SFelix Kuehling 131926103436SFelix Kuehling p->last_eviction_seqno = fence->seqno; 132026103436SFelix Kuehling 132126103436SFelix Kuehling /* Avoid KFD process starvation. Wait for at least 132226103436SFelix Kuehling * PROCESS_ACTIVE_TIME_MS before evicting the process again 132326103436SFelix Kuehling */ 132426103436SFelix Kuehling active_time = get_jiffies_64() - p->last_restore_timestamp; 132526103436SFelix Kuehling if (delay_jiffies > active_time) 132626103436SFelix Kuehling delay_jiffies -= active_time; 132726103436SFelix Kuehling else 132826103436SFelix Kuehling delay_jiffies = 0; 132926103436SFelix Kuehling 133026103436SFelix Kuehling /* During process initialization eviction_work.dwork is initialized 133126103436SFelix Kuehling * to kfd_evict_bo_worker 133226103436SFelix Kuehling */ 1333b2057956SFelix Kuehling WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", 1334b2057956SFelix Kuehling p->lead_thread->pid, delay_jiffies); 133526103436SFelix Kuehling schedule_delayed_work(&p->eviction_work, delay_jiffies); 133626103436SFelix Kuehling out: 133726103436SFelix Kuehling kfd_unref_process(p); 133826103436SFelix Kuehling return 0; 133926103436SFelix Kuehling } 134026103436SFelix Kuehling 13416e81090bSOded Gabbay static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 13426e81090bSOded Gabbay unsigned int chunk_size) 13436e81090bSOded Gabbay { 13448625ff9cSFelix Kuehling unsigned int num_of_longs; 13456e81090bSOded Gabbay 134632fa8219SFelix Kuehling if (WARN_ON(buf_size < chunk_size)) 134732fa8219SFelix Kuehling return -EINVAL; 134832fa8219SFelix Kuehling if (WARN_ON(buf_size == 0)) 134932fa8219SFelix Kuehling return -EINVAL; 135032fa8219SFelix Kuehling if (WARN_ON(chunk_size == 0)) 135132fa8219SFelix Kuehling return -EINVAL; 13526e81090bSOded Gabbay 13536e81090bSOded Gabbay kfd->gtt_sa_chunk_size = chunk_size; 13546e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; 13556e81090bSOded Gabbay 13568625ff9cSFelix Kuehling num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / 13578625ff9cSFelix Kuehling BITS_PER_LONG; 13586e81090bSOded Gabbay 13598625ff9cSFelix Kuehling kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); 13606e81090bSOded Gabbay 13616e81090bSOded Gabbay if (!kfd->gtt_sa_bitmap) 13626e81090bSOded Gabbay return -ENOMEM; 13636e81090bSOded Gabbay 136479775b62SKent Russell pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", 13656e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); 13666e81090bSOded Gabbay 13676e81090bSOded Gabbay mutex_init(&kfd->gtt_sa_lock); 13686e81090bSOded Gabbay 13696e81090bSOded Gabbay return 0; 13706e81090bSOded Gabbay 13716e81090bSOded Gabbay } 13726e81090bSOded Gabbay 13736e81090bSOded Gabbay static void kfd_gtt_sa_fini(struct kfd_dev *kfd) 13746e81090bSOded Gabbay { 13756e81090bSOded Gabbay mutex_destroy(&kfd->gtt_sa_lock); 13766e81090bSOded Gabbay kfree(kfd->gtt_sa_bitmap); 13776e81090bSOded Gabbay } 13786e81090bSOded Gabbay 13796e81090bSOded Gabbay static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, 13806e81090bSOded Gabbay unsigned int bit_num, 13816e81090bSOded Gabbay unsigned int chunk_size) 13826e81090bSOded Gabbay { 13836e81090bSOded Gabbay return start_addr + bit_num * chunk_size; 13846e81090bSOded Gabbay } 13856e81090bSOded Gabbay 13866e81090bSOded Gabbay static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, 13876e81090bSOded Gabbay unsigned int bit_num, 13886e81090bSOded Gabbay unsigned int chunk_size) 13896e81090bSOded Gabbay { 13906e81090bSOded Gabbay return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); 13916e81090bSOded Gabbay } 13926e81090bSOded Gabbay 13936e81090bSOded Gabbay int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 13946e81090bSOded Gabbay struct kfd_mem_obj **mem_obj) 13956e81090bSOded Gabbay { 13966e81090bSOded Gabbay unsigned int found, start_search, cur_size; 13976e81090bSOded Gabbay 13986e81090bSOded Gabbay if (size == 0) 13996e81090bSOded Gabbay return -EINVAL; 14006e81090bSOded Gabbay 14016e81090bSOded Gabbay if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) 14026e81090bSOded Gabbay return -ENOMEM; 14036e81090bSOded Gabbay 14041cd106ecSFelix Kuehling *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); 14051cd106ecSFelix Kuehling if (!(*mem_obj)) 14066e81090bSOded Gabbay return -ENOMEM; 14076e81090bSOded Gabbay 140879775b62SKent Russell pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); 14096e81090bSOded Gabbay 14106e81090bSOded Gabbay start_search = 0; 14116e81090bSOded Gabbay 14126e81090bSOded Gabbay mutex_lock(&kfd->gtt_sa_lock); 14136e81090bSOded Gabbay 14146e81090bSOded Gabbay kfd_gtt_restart_search: 14156e81090bSOded Gabbay /* Find the first chunk that is free */ 14166e81090bSOded Gabbay found = find_next_zero_bit(kfd->gtt_sa_bitmap, 14176e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks, 14186e81090bSOded Gabbay start_search); 14196e81090bSOded Gabbay 142079775b62SKent Russell pr_debug("Found = %d\n", found); 14216e81090bSOded Gabbay 14226e81090bSOded Gabbay /* If there wasn't any free chunk, bail out */ 14236e81090bSOded Gabbay if (found == kfd->gtt_sa_num_of_chunks) 14246e81090bSOded Gabbay goto kfd_gtt_no_free_chunk; 14256e81090bSOded Gabbay 14266e81090bSOded Gabbay /* Update fields of mem_obj */ 14276e81090bSOded Gabbay (*mem_obj)->range_start = found; 14286e81090bSOded Gabbay (*mem_obj)->range_end = found; 14296e81090bSOded Gabbay (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( 14306e81090bSOded Gabbay kfd->gtt_start_gpu_addr, 14316e81090bSOded Gabbay found, 14326e81090bSOded Gabbay kfd->gtt_sa_chunk_size); 14336e81090bSOded Gabbay (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( 14346e81090bSOded Gabbay kfd->gtt_start_cpu_ptr, 14356e81090bSOded Gabbay found, 14366e81090bSOded Gabbay kfd->gtt_sa_chunk_size); 14376e81090bSOded Gabbay 143879775b62SKent Russell pr_debug("gpu_addr = %p, cpu_addr = %p\n", 14396e81090bSOded Gabbay (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); 14406e81090bSOded Gabbay 14416e81090bSOded Gabbay /* If we need only one chunk, mark it as allocated and get out */ 14426e81090bSOded Gabbay if (size <= kfd->gtt_sa_chunk_size) { 144379775b62SKent Russell pr_debug("Single bit\n"); 14446e81090bSOded Gabbay set_bit(found, kfd->gtt_sa_bitmap); 14456e81090bSOded Gabbay goto kfd_gtt_out; 14466e81090bSOded Gabbay } 14476e81090bSOded Gabbay 14486e81090bSOded Gabbay /* Otherwise, try to see if we have enough contiguous chunks */ 14496e81090bSOded Gabbay cur_size = size - kfd->gtt_sa_chunk_size; 14506e81090bSOded Gabbay do { 14516e81090bSOded Gabbay (*mem_obj)->range_end = 14526e81090bSOded Gabbay find_next_zero_bit(kfd->gtt_sa_bitmap, 14536e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks, ++found); 14546e81090bSOded Gabbay /* 14556e81090bSOded Gabbay * If next free chunk is not contiguous than we need to 14566e81090bSOded Gabbay * restart our search from the last free chunk we found (which 14576e81090bSOded Gabbay * wasn't contiguous to the previous ones 14586e81090bSOded Gabbay */ 14596e81090bSOded Gabbay if ((*mem_obj)->range_end != found) { 14606e81090bSOded Gabbay start_search = found; 14616e81090bSOded Gabbay goto kfd_gtt_restart_search; 14626e81090bSOded Gabbay } 14636e81090bSOded Gabbay 14646e81090bSOded Gabbay /* 14656e81090bSOded Gabbay * If we reached end of buffer, bail out with error 14666e81090bSOded Gabbay */ 14676e81090bSOded Gabbay if (found == kfd->gtt_sa_num_of_chunks) 14686e81090bSOded Gabbay goto kfd_gtt_no_free_chunk; 14696e81090bSOded Gabbay 14706e81090bSOded Gabbay /* Check if we don't need another chunk */ 14716e81090bSOded Gabbay if (cur_size <= kfd->gtt_sa_chunk_size) 14726e81090bSOded Gabbay cur_size = 0; 14736e81090bSOded Gabbay else 14746e81090bSOded Gabbay cur_size -= kfd->gtt_sa_chunk_size; 14756e81090bSOded Gabbay 14766e81090bSOded Gabbay } while (cur_size > 0); 14776e81090bSOded Gabbay 147879775b62SKent Russell pr_debug("range_start = %d, range_end = %d\n", 14796e81090bSOded Gabbay (*mem_obj)->range_start, (*mem_obj)->range_end); 14806e81090bSOded Gabbay 14816e81090bSOded Gabbay /* Mark the chunks as allocated */ 14826e81090bSOded Gabbay for (found = (*mem_obj)->range_start; 14836e81090bSOded Gabbay found <= (*mem_obj)->range_end; 14846e81090bSOded Gabbay found++) 14856e81090bSOded Gabbay set_bit(found, kfd->gtt_sa_bitmap); 14866e81090bSOded Gabbay 14876e81090bSOded Gabbay kfd_gtt_out: 14886e81090bSOded Gabbay mutex_unlock(&kfd->gtt_sa_lock); 14896e81090bSOded Gabbay return 0; 14906e81090bSOded Gabbay 14916e81090bSOded Gabbay kfd_gtt_no_free_chunk: 14923148a6a0SJack Zhang pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); 14936e81090bSOded Gabbay mutex_unlock(&kfd->gtt_sa_lock); 14943148a6a0SJack Zhang kfree(*mem_obj); 14956e81090bSOded Gabbay return -ENOMEM; 14966e81090bSOded Gabbay } 14976e81090bSOded Gabbay 14986e81090bSOded Gabbay int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) 14996e81090bSOded Gabbay { 15006e81090bSOded Gabbay unsigned int bit; 15016e81090bSOded Gabbay 15029216ed29SOded Gabbay /* Act like kfree when trying to free a NULL object */ 15039216ed29SOded Gabbay if (!mem_obj) 15049216ed29SOded Gabbay return 0; 15056e81090bSOded Gabbay 150679775b62SKent Russell pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", 15076e81090bSOded Gabbay mem_obj, mem_obj->range_start, mem_obj->range_end); 15086e81090bSOded Gabbay 15096e81090bSOded Gabbay mutex_lock(&kfd->gtt_sa_lock); 15106e81090bSOded Gabbay 15116e81090bSOded Gabbay /* Mark the chunks as free */ 15126e81090bSOded Gabbay for (bit = mem_obj->range_start; 15136e81090bSOded Gabbay bit <= mem_obj->range_end; 15146e81090bSOded Gabbay bit++) 15156e81090bSOded Gabbay clear_bit(bit, kfd->gtt_sa_bitmap); 15166e81090bSOded Gabbay 15176e81090bSOded Gabbay mutex_unlock(&kfd->gtt_sa_lock); 15186e81090bSOded Gabbay 15196e81090bSOded Gabbay kfree(mem_obj); 15206e81090bSOded Gabbay return 0; 15216e81090bSOded Gabbay } 1522a29ec470SShaoyun Liu 15239b54d201SEric Huang void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) 15249b54d201SEric Huang { 15259b54d201SEric Huang if (kfd) 15269b54d201SEric Huang atomic_inc(&kfd->sram_ecc_flag); 15279b54d201SEric Huang } 15289b54d201SEric Huang 152943d8107fSHarish Kasiviswanathan void kfd_inc_compute_active(struct kfd_dev *kfd) 153043d8107fSHarish Kasiviswanathan { 153143d8107fSHarish Kasiviswanathan if (atomic_inc_return(&kfd->compute_profile) == 1) 15326bfc7c7eSGraham Sider amdgpu_amdkfd_set_compute_idle(kfd->adev, false); 153343d8107fSHarish Kasiviswanathan } 153443d8107fSHarish Kasiviswanathan 153543d8107fSHarish Kasiviswanathan void kfd_dec_compute_active(struct kfd_dev *kfd) 153643d8107fSHarish Kasiviswanathan { 153743d8107fSHarish Kasiviswanathan int count = atomic_dec_return(&kfd->compute_profile); 153843d8107fSHarish Kasiviswanathan 153943d8107fSHarish Kasiviswanathan if (count == 0) 15406bfc7c7eSGraham Sider amdgpu_amdkfd_set_compute_idle(kfd->adev, true); 154143d8107fSHarish Kasiviswanathan WARN_ONCE(count < 0, "Compute profile ref. count error"); 154243d8107fSHarish Kasiviswanathan } 154343d8107fSHarish Kasiviswanathan 1544410e302eSGraham Sider void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 15452c2b0d88SMukul Joshi { 1546158fc08dSAmber Lin if (kfd && kfd->init_complete) 15472c2b0d88SMukul Joshi kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); 15482c2b0d88SMukul Joshi } 15492c2b0d88SMukul Joshi 1550a29ec470SShaoyun Liu #if defined(CONFIG_DEBUG_FS) 1551a29ec470SShaoyun Liu 1552a29ec470SShaoyun Liu /* This function will send a package to HIQ to hang the HWS 1553a29ec470SShaoyun Liu * which will trigger a GPU reset and bring the HWS back to normal state 1554a29ec470SShaoyun Liu */ 1555a29ec470SShaoyun Liu int kfd_debugfs_hang_hws(struct kfd_dev *dev) 1556a29ec470SShaoyun Liu { 1557a29ec470SShaoyun Liu if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { 1558a29ec470SShaoyun Liu pr_err("HWS is not enabled"); 1559a29ec470SShaoyun Liu return -EINVAL; 1560a29ec470SShaoyun Liu } 1561a29ec470SShaoyun Liu 15624f942aaeSOak Zeng return dqm_debugfs_hang_hws(dev->dqm); 1563a29ec470SShaoyun Liu } 1564a29ec470SShaoyun Liu 1565a29ec470SShaoyun Liu #endif 1566