14a488a7aSOded Gabbay /* 24a488a7aSOded Gabbay * Copyright 2014 Advanced Micro Devices, Inc. 34a488a7aSOded Gabbay * 44a488a7aSOded Gabbay * Permission is hereby granted, free of charge, to any person obtaining a 54a488a7aSOded Gabbay * copy of this software and associated documentation files (the "Software"), 64a488a7aSOded Gabbay * to deal in the Software without restriction, including without limitation 74a488a7aSOded Gabbay * the rights to use, copy, modify, merge, publish, distribute, sublicense, 84a488a7aSOded Gabbay * and/or sell copies of the Software, and to permit persons to whom the 94a488a7aSOded Gabbay * Software is furnished to do so, subject to the following conditions: 104a488a7aSOded Gabbay * 114a488a7aSOded Gabbay * The above copyright notice and this permission notice shall be included in 124a488a7aSOded Gabbay * all copies or substantial portions of the Software. 134a488a7aSOded Gabbay * 144a488a7aSOded Gabbay * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 154a488a7aSOded Gabbay * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 164a488a7aSOded Gabbay * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 174a488a7aSOded Gabbay * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 184a488a7aSOded Gabbay * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 194a488a7aSOded Gabbay * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 204a488a7aSOded Gabbay * OTHER DEALINGS IN THE SOFTWARE. 214a488a7aSOded Gabbay */ 224a488a7aSOded Gabbay 234a488a7aSOded Gabbay #include <linux/bsearch.h> 244a488a7aSOded Gabbay #include <linux/pci.h> 254a488a7aSOded Gabbay #include <linux/slab.h> 264a488a7aSOded Gabbay #include "kfd_priv.h" 2764c7f8cfSBen Goz #include "kfd_device_queue_manager.h" 28507968ddSFelix Kuehling #include "kfd_pm4_headers_vi.h" 29fd6a440eSJonathan Kim #include "kfd_pm4_headers_aldebaran.h" 300db54b24SYong Zhao #include "cwsr_trap_handler.h" 3164d1c3a4SFelix Kuehling #include "kfd_iommu.h" 325b87245fSAmber Lin #include "amdgpu_amdkfd.h" 332c2b0d88SMukul Joshi #include "kfd_smi_events.h" 34814ab993SPhilip Yang #include "kfd_migrate.h" 355b983db8SAlex Deucher #include "amdgpu.h" 364a488a7aSOded Gabbay 3719f6d2a6SOded Gabbay #define MQD_SIZE_ALIGNED 768 38e42051d2SShaoyun Liu 39e42051d2SShaoyun Liu /* 40e42051d2SShaoyun Liu * kfd_locked is used to lock the kfd driver during suspend or reset 41e42051d2SShaoyun Liu * once locked, kfd driver will stop any further GPU execution. 42e42051d2SShaoyun Liu * create process (open) will return -EAGAIN. 43e42051d2SShaoyun Liu */ 44e42051d2SShaoyun Liu static atomic_t kfd_locked = ATOMIC_INIT(0); 4519f6d2a6SOded Gabbay 46a3e520a2SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 47e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; 48a3e520a2SAlex Deucher #endif 49e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; 50e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; 51e392c887SYong Zhao extern const struct kfd2kgd_calls arcturus_kfd2kgd; 525073506cSJonathan Kim extern const struct kfd2kgd_calls aldebaran_kfd2kgd; 53e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; 543a2f0c81SYong Zhao extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; 55e392c887SYong Zhao 5664d1c3a4SFelix Kuehling #ifdef KFD_SUPPORT_IOMMU_V2 574a488a7aSOded Gabbay static const struct kfd_device_info kaveri_device_info = { 580da7558cSBen Goz .asic_family = CHIP_KAVERI, 59c181159aSYong Zhao .asic_name = "kaveri", 609d6fa9c7SGraham Sider .gfx_target_version = 70000, 610da7558cSBen Goz .max_pasid_bits = 16, 62992839adSYair Shachar /* max num of queues for KV.TODO should be a dynamic value */ 63992839adSYair Shachar .max_no_of_hqd = 24, 64ada2b29cSFelix Kuehling .doorbell_size = 4, 650da7558cSBen Goz .ih_ring_entry_size = 4 * sizeof(uint32_t), 66f3a39818SAndrew Lewycky .event_interrupt_class = &event_interrupt_class_cik, 67fbeb661bSYair Shachar .num_of_watch_points = 4, 68373d7080SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 69373d7080SFelix Kuehling .supports_cwsr = false, 7064d1c3a4SFelix Kuehling .needs_iommu_device = true, 713ee2d00cSFelix Kuehling .needs_pci_atomics = false, 7298bb9222SYong Zhao .num_sdma_engines = 2, 731b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 74d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 750da7558cSBen Goz }; 760da7558cSBen Goz 770da7558cSBen Goz static const struct kfd_device_info carrizo_device_info = { 780da7558cSBen Goz .asic_family = CHIP_CARRIZO, 79c181159aSYong Zhao .asic_name = "carrizo", 809d6fa9c7SGraham Sider .gfx_target_version = 80001, 814a488a7aSOded Gabbay .max_pasid_bits = 16, 82eaccd6e7SOded Gabbay /* max num of queues for CZ.TODO should be a dynamic value */ 83eaccd6e7SOded Gabbay .max_no_of_hqd = 24, 84ada2b29cSFelix Kuehling .doorbell_size = 4, 85b3f5e6b4SAndrew Lewycky .ih_ring_entry_size = 4 * sizeof(uint32_t), 86eaccd6e7SOded Gabbay .event_interrupt_class = &event_interrupt_class_cik, 87f7c826adSAlexey Skidanov .num_of_watch_points = 4, 88373d7080SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 89373d7080SFelix Kuehling .supports_cwsr = true, 9064d1c3a4SFelix Kuehling .needs_iommu_device = true, 913ee2d00cSFelix Kuehling .needs_pci_atomics = false, 9298bb9222SYong Zhao .num_sdma_engines = 2, 931b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 94d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 954a488a7aSOded Gabbay }; 966127896fSHuang Rui #endif 974d663df6SYong Zhao 984d663df6SYong Zhao static const struct kfd_device_info raven_device_info = { 994d663df6SYong Zhao .asic_family = CHIP_RAVEN, 100c181159aSYong Zhao .asic_name = "raven", 1019d6fa9c7SGraham Sider .gfx_target_version = 90002, 1024d663df6SYong Zhao .max_pasid_bits = 16, 1034d663df6SYong Zhao .max_no_of_hqd = 24, 1044d663df6SYong Zhao .doorbell_size = 8, 1054d663df6SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 1064d663df6SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 1074d663df6SYong Zhao .num_of_watch_points = 4, 1084d663df6SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 1094d663df6SYong Zhao .supports_cwsr = true, 1104d663df6SYong Zhao .needs_iommu_device = true, 1114d663df6SYong Zhao .needs_pci_atomics = true, 1124d663df6SYong Zhao .num_sdma_engines = 1, 1131b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 114d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 1154d663df6SYong Zhao }; 1164a488a7aSOded Gabbay 117a3084e6cSFelix Kuehling static const struct kfd_device_info hawaii_device_info = { 118a3084e6cSFelix Kuehling .asic_family = CHIP_HAWAII, 119c181159aSYong Zhao .asic_name = "hawaii", 1209d6fa9c7SGraham Sider .gfx_target_version = 70001, 121a3084e6cSFelix Kuehling .max_pasid_bits = 16, 122a3084e6cSFelix Kuehling /* max num of queues for KV.TODO should be a dynamic value */ 123a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 124ada2b29cSFelix Kuehling .doorbell_size = 4, 125a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 126a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 127a3084e6cSFelix Kuehling .num_of_watch_points = 4, 128a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 129a3084e6cSFelix Kuehling .supports_cwsr = false, 13064d1c3a4SFelix Kuehling .needs_iommu_device = false, 131a3084e6cSFelix Kuehling .needs_pci_atomics = false, 13298bb9222SYong Zhao .num_sdma_engines = 2, 1331b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 134d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 135a3084e6cSFelix Kuehling }; 136a3084e6cSFelix Kuehling 137a3084e6cSFelix Kuehling static const struct kfd_device_info tonga_device_info = { 138a3084e6cSFelix Kuehling .asic_family = CHIP_TONGA, 139c181159aSYong Zhao .asic_name = "tonga", 1409d6fa9c7SGraham Sider .gfx_target_version = 80002, 141a3084e6cSFelix Kuehling .max_pasid_bits = 16, 142a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 143ada2b29cSFelix Kuehling .doorbell_size = 4, 144a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 145a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 146a3084e6cSFelix Kuehling .num_of_watch_points = 4, 147a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 148a3084e6cSFelix Kuehling .supports_cwsr = false, 14964d1c3a4SFelix Kuehling .needs_iommu_device = false, 150a3084e6cSFelix Kuehling .needs_pci_atomics = true, 15198bb9222SYong Zhao .num_sdma_engines = 2, 1521b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 153d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 154a3084e6cSFelix Kuehling }; 155a3084e6cSFelix Kuehling 156a3084e6cSFelix Kuehling static const struct kfd_device_info fiji_device_info = { 157a3084e6cSFelix Kuehling .asic_family = CHIP_FIJI, 158c181159aSYong Zhao .asic_name = "fiji", 1599d6fa9c7SGraham Sider .gfx_target_version = 80003, 160a3084e6cSFelix Kuehling .max_pasid_bits = 16, 161a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 162ada2b29cSFelix Kuehling .doorbell_size = 4, 163a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 164a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 165a3084e6cSFelix Kuehling .num_of_watch_points = 4, 166a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 167a3084e6cSFelix Kuehling .supports_cwsr = true, 16864d1c3a4SFelix Kuehling .needs_iommu_device = false, 169a3084e6cSFelix Kuehling .needs_pci_atomics = true, 17098bb9222SYong Zhao .num_sdma_engines = 2, 1711b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 172d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 173a3084e6cSFelix Kuehling }; 174a3084e6cSFelix Kuehling 175a3084e6cSFelix Kuehling static const struct kfd_device_info fiji_vf_device_info = { 176a3084e6cSFelix Kuehling .asic_family = CHIP_FIJI, 177c181159aSYong Zhao .asic_name = "fiji", 1789d6fa9c7SGraham Sider .gfx_target_version = 80003, 179a3084e6cSFelix Kuehling .max_pasid_bits = 16, 180a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 181ada2b29cSFelix Kuehling .doorbell_size = 4, 182a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 183a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 184a3084e6cSFelix Kuehling .num_of_watch_points = 4, 185a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 186a3084e6cSFelix Kuehling .supports_cwsr = true, 18764d1c3a4SFelix Kuehling .needs_iommu_device = false, 188a3084e6cSFelix Kuehling .needs_pci_atomics = false, 18998bb9222SYong Zhao .num_sdma_engines = 2, 1901b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 191d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 192a3084e6cSFelix Kuehling }; 193a3084e6cSFelix Kuehling 194a3084e6cSFelix Kuehling 195a3084e6cSFelix Kuehling static const struct kfd_device_info polaris10_device_info = { 196a3084e6cSFelix Kuehling .asic_family = CHIP_POLARIS10, 197c181159aSYong Zhao .asic_name = "polaris10", 1989d6fa9c7SGraham Sider .gfx_target_version = 80003, 199a3084e6cSFelix Kuehling .max_pasid_bits = 16, 200a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 201ada2b29cSFelix Kuehling .doorbell_size = 4, 202a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 203a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 204a3084e6cSFelix Kuehling .num_of_watch_points = 4, 205a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 206a3084e6cSFelix Kuehling .supports_cwsr = true, 20764d1c3a4SFelix Kuehling .needs_iommu_device = false, 208a3084e6cSFelix Kuehling .needs_pci_atomics = true, 20998bb9222SYong Zhao .num_sdma_engines = 2, 2101b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 211d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 212a3084e6cSFelix Kuehling }; 213a3084e6cSFelix Kuehling 214a3084e6cSFelix Kuehling static const struct kfd_device_info polaris10_vf_device_info = { 215a3084e6cSFelix Kuehling .asic_family = CHIP_POLARIS10, 216c181159aSYong Zhao .asic_name = "polaris10", 2179d6fa9c7SGraham Sider .gfx_target_version = 80003, 218a3084e6cSFelix Kuehling .max_pasid_bits = 16, 219a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 220ada2b29cSFelix Kuehling .doorbell_size = 4, 221a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 222a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 223a3084e6cSFelix Kuehling .num_of_watch_points = 4, 224a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 225a3084e6cSFelix Kuehling .supports_cwsr = true, 22664d1c3a4SFelix Kuehling .needs_iommu_device = false, 227a3084e6cSFelix Kuehling .needs_pci_atomics = false, 22898bb9222SYong Zhao .num_sdma_engines = 2, 2291b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 230d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 231a3084e6cSFelix Kuehling }; 232a3084e6cSFelix Kuehling 233a3084e6cSFelix Kuehling static const struct kfd_device_info polaris11_device_info = { 234a3084e6cSFelix Kuehling .asic_family = CHIP_POLARIS11, 235c181159aSYong Zhao .asic_name = "polaris11", 2369d6fa9c7SGraham Sider .gfx_target_version = 80003, 237a3084e6cSFelix Kuehling .max_pasid_bits = 16, 238a3084e6cSFelix Kuehling .max_no_of_hqd = 24, 239ada2b29cSFelix Kuehling .doorbell_size = 4, 240a3084e6cSFelix Kuehling .ih_ring_entry_size = 4 * sizeof(uint32_t), 241a3084e6cSFelix Kuehling .event_interrupt_class = &event_interrupt_class_cik, 242a3084e6cSFelix Kuehling .num_of_watch_points = 4, 243a3084e6cSFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 244a3084e6cSFelix Kuehling .supports_cwsr = true, 24564d1c3a4SFelix Kuehling .needs_iommu_device = false, 246a3084e6cSFelix Kuehling .needs_pci_atomics = true, 24798bb9222SYong Zhao .num_sdma_engines = 2, 2481b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 249d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 250a3084e6cSFelix Kuehling }; 251a3084e6cSFelix Kuehling 252846a44d7SGang Ba static const struct kfd_device_info polaris12_device_info = { 253846a44d7SGang Ba .asic_family = CHIP_POLARIS12, 254c181159aSYong Zhao .asic_name = "polaris12", 2559d6fa9c7SGraham Sider .gfx_target_version = 80003, 256846a44d7SGang Ba .max_pasid_bits = 16, 257846a44d7SGang Ba .max_no_of_hqd = 24, 258846a44d7SGang Ba .doorbell_size = 4, 259846a44d7SGang Ba .ih_ring_entry_size = 4 * sizeof(uint32_t), 260846a44d7SGang Ba .event_interrupt_class = &event_interrupt_class_cik, 261846a44d7SGang Ba .num_of_watch_points = 4, 262846a44d7SGang Ba .mqd_size_aligned = MQD_SIZE_ALIGNED, 263846a44d7SGang Ba .supports_cwsr = true, 264846a44d7SGang Ba .needs_iommu_device = false, 265846a44d7SGang Ba .needs_pci_atomics = true, 266846a44d7SGang Ba .num_sdma_engines = 2, 2671b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 268846a44d7SGang Ba .num_sdma_queues_per_engine = 2, 269846a44d7SGang Ba }; 270846a44d7SGang Ba 271ed81cd6eSKent Russell static const struct kfd_device_info vegam_device_info = { 272ed81cd6eSKent Russell .asic_family = CHIP_VEGAM, 273c181159aSYong Zhao .asic_name = "vegam", 2749d6fa9c7SGraham Sider .gfx_target_version = 80003, 275ed81cd6eSKent Russell .max_pasid_bits = 16, 276ed81cd6eSKent Russell .max_no_of_hqd = 24, 277ed81cd6eSKent Russell .doorbell_size = 4, 278ed81cd6eSKent Russell .ih_ring_entry_size = 4 * sizeof(uint32_t), 279ed81cd6eSKent Russell .event_interrupt_class = &event_interrupt_class_cik, 280ed81cd6eSKent Russell .num_of_watch_points = 4, 281ed81cd6eSKent Russell .mqd_size_aligned = MQD_SIZE_ALIGNED, 282ed81cd6eSKent Russell .supports_cwsr = true, 283ed81cd6eSKent Russell .needs_iommu_device = false, 284ed81cd6eSKent Russell .needs_pci_atomics = true, 285ed81cd6eSKent Russell .num_sdma_engines = 2, 286ed81cd6eSKent Russell .num_xgmi_sdma_engines = 0, 287a3084e6cSFelix Kuehling .num_sdma_queues_per_engine = 2, 288a3084e6cSFelix Kuehling }; 289a3084e6cSFelix Kuehling 290389056e5SFelix Kuehling static const struct kfd_device_info vega10_device_info = { 291389056e5SFelix Kuehling .asic_family = CHIP_VEGA10, 292c181159aSYong Zhao .asic_name = "vega10", 2939d6fa9c7SGraham Sider .gfx_target_version = 90000, 294389056e5SFelix Kuehling .max_pasid_bits = 16, 295389056e5SFelix Kuehling .max_no_of_hqd = 24, 296389056e5SFelix Kuehling .doorbell_size = 8, 297389056e5SFelix Kuehling .ih_ring_entry_size = 8 * sizeof(uint32_t), 298389056e5SFelix Kuehling .event_interrupt_class = &event_interrupt_class_v9, 299389056e5SFelix Kuehling .num_of_watch_points = 4, 300389056e5SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 301389056e5SFelix Kuehling .supports_cwsr = true, 302389056e5SFelix Kuehling .needs_iommu_device = false, 303389056e5SFelix Kuehling .needs_pci_atomics = false, 30498bb9222SYong Zhao .num_sdma_engines = 2, 3051b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 306d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 307389056e5SFelix Kuehling }; 308389056e5SFelix Kuehling 309389056e5SFelix Kuehling static const struct kfd_device_info vega10_vf_device_info = { 310389056e5SFelix Kuehling .asic_family = CHIP_VEGA10, 311c181159aSYong Zhao .asic_name = "vega10", 3129d6fa9c7SGraham Sider .gfx_target_version = 90000, 313389056e5SFelix Kuehling .max_pasid_bits = 16, 314389056e5SFelix Kuehling .max_no_of_hqd = 24, 315389056e5SFelix Kuehling .doorbell_size = 8, 316389056e5SFelix Kuehling .ih_ring_entry_size = 8 * sizeof(uint32_t), 317389056e5SFelix Kuehling .event_interrupt_class = &event_interrupt_class_v9, 318389056e5SFelix Kuehling .num_of_watch_points = 4, 319389056e5SFelix Kuehling .mqd_size_aligned = MQD_SIZE_ALIGNED, 320389056e5SFelix Kuehling .supports_cwsr = true, 321389056e5SFelix Kuehling .needs_iommu_device = false, 322389056e5SFelix Kuehling .needs_pci_atomics = false, 32398bb9222SYong Zhao .num_sdma_engines = 2, 3241b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 325d5094189SShaoyun Liu .num_sdma_queues_per_engine = 2, 326389056e5SFelix Kuehling }; 327389056e5SFelix Kuehling 328846a44d7SGang Ba static const struct kfd_device_info vega12_device_info = { 329846a44d7SGang Ba .asic_family = CHIP_VEGA12, 330c181159aSYong Zhao .asic_name = "vega12", 3319d6fa9c7SGraham Sider .gfx_target_version = 90004, 332846a44d7SGang Ba .max_pasid_bits = 16, 333846a44d7SGang Ba .max_no_of_hqd = 24, 334846a44d7SGang Ba .doorbell_size = 8, 335846a44d7SGang Ba .ih_ring_entry_size = 8 * sizeof(uint32_t), 336846a44d7SGang Ba .event_interrupt_class = &event_interrupt_class_v9, 337846a44d7SGang Ba .num_of_watch_points = 4, 338846a44d7SGang Ba .mqd_size_aligned = MQD_SIZE_ALIGNED, 339846a44d7SGang Ba .supports_cwsr = true, 340846a44d7SGang Ba .needs_iommu_device = false, 341846a44d7SGang Ba .needs_pci_atomics = false, 342846a44d7SGang Ba .num_sdma_engines = 2, 3431b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 344846a44d7SGang Ba .num_sdma_queues_per_engine = 2, 345846a44d7SGang Ba }; 346846a44d7SGang Ba 34722a3a294SShaoyun Liu static const struct kfd_device_info vega20_device_info = { 34822a3a294SShaoyun Liu .asic_family = CHIP_VEGA20, 349c181159aSYong Zhao .asic_name = "vega20", 3509d6fa9c7SGraham Sider .gfx_target_version = 90006, 35122a3a294SShaoyun Liu .max_pasid_bits = 16, 35222a3a294SShaoyun Liu .max_no_of_hqd = 24, 35322a3a294SShaoyun Liu .doorbell_size = 8, 35422a3a294SShaoyun Liu .ih_ring_entry_size = 8 * sizeof(uint32_t), 35522a3a294SShaoyun Liu .event_interrupt_class = &event_interrupt_class_v9, 35622a3a294SShaoyun Liu .num_of_watch_points = 4, 35722a3a294SShaoyun Liu .mqd_size_aligned = MQD_SIZE_ALIGNED, 35822a3a294SShaoyun Liu .supports_cwsr = true, 35922a3a294SShaoyun Liu .needs_iommu_device = false, 360006a0b3dSShaoyun Liu .needs_pci_atomics = false, 36122a3a294SShaoyun Liu .num_sdma_engines = 2, 3621b4670f6SOak Zeng .num_xgmi_sdma_engines = 0, 36322a3a294SShaoyun Liu .num_sdma_queues_per_engine = 8, 36422a3a294SShaoyun Liu }; 36522a3a294SShaoyun Liu 36649adcf8aSYong Zhao static const struct kfd_device_info arcturus_device_info = { 36749adcf8aSYong Zhao .asic_family = CHIP_ARCTURUS, 368c181159aSYong Zhao .asic_name = "arcturus", 3699d6fa9c7SGraham Sider .gfx_target_version = 90008, 37049adcf8aSYong Zhao .max_pasid_bits = 16, 37149adcf8aSYong Zhao .max_no_of_hqd = 24, 37249adcf8aSYong Zhao .doorbell_size = 8, 37349adcf8aSYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 37449adcf8aSYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 37549adcf8aSYong Zhao .num_of_watch_points = 4, 37649adcf8aSYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 37749adcf8aSYong Zhao .supports_cwsr = true, 37849adcf8aSYong Zhao .needs_iommu_device = false, 37949adcf8aSYong Zhao .needs_pci_atomics = false, 380b6689cf7SOak Zeng .num_sdma_engines = 2, 381b6689cf7SOak Zeng .num_xgmi_sdma_engines = 6, 38249adcf8aSYong Zhao .num_sdma_queues_per_engine = 8, 38349adcf8aSYong Zhao }; 38449adcf8aSYong Zhao 38536e22d59SYong Zhao static const struct kfd_device_info aldebaran_device_info = { 38636e22d59SYong Zhao .asic_family = CHIP_ALDEBARAN, 38736e22d59SYong Zhao .asic_name = "aldebaran", 3889d6fa9c7SGraham Sider .gfx_target_version = 90010, 38936e22d59SYong Zhao .max_pasid_bits = 16, 39036e22d59SYong Zhao .max_no_of_hqd = 24, 39136e22d59SYong Zhao .doorbell_size = 8, 39236e22d59SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 39336e22d59SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 39436e22d59SYong Zhao .num_of_watch_points = 4, 39536e22d59SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 39636e22d59SYong Zhao .supports_cwsr = true, 39736e22d59SYong Zhao .needs_iommu_device = false, 39836e22d59SYong Zhao .needs_pci_atomics = false, 39936e22d59SYong Zhao .num_sdma_engines = 2, 40036e22d59SYong Zhao .num_xgmi_sdma_engines = 3, 40136e22d59SYong Zhao .num_sdma_queues_per_engine = 8, 40236e22d59SYong Zhao }; 40336e22d59SYong Zhao 4042b9c2211SHuang Rui static const struct kfd_device_info renoir_device_info = { 4052b9c2211SHuang Rui .asic_family = CHIP_RENOIR, 406acb9acbeSHuang Rui .asic_name = "renoir", 4079d6fa9c7SGraham Sider .gfx_target_version = 90002, 4082b9c2211SHuang Rui .max_pasid_bits = 16, 4092b9c2211SHuang Rui .max_no_of_hqd = 24, 4102b9c2211SHuang Rui .doorbell_size = 8, 4112b9c2211SHuang Rui .ih_ring_entry_size = 8 * sizeof(uint32_t), 4122b9c2211SHuang Rui .event_interrupt_class = &event_interrupt_class_v9, 4132b9c2211SHuang Rui .num_of_watch_points = 4, 4142b9c2211SHuang Rui .mqd_size_aligned = MQD_SIZE_ALIGNED, 4152b9c2211SHuang Rui .supports_cwsr = true, 4162b9c2211SHuang Rui .needs_iommu_device = false, 4172b9c2211SHuang Rui .needs_pci_atomics = false, 4182b9c2211SHuang Rui .num_sdma_engines = 1, 4192b9c2211SHuang Rui .num_xgmi_sdma_engines = 0, 4202b9c2211SHuang Rui .num_sdma_queues_per_engine = 2, 4212b9c2211SHuang Rui }; 4222b9c2211SHuang Rui 42314328aa5SPhilip Cox static const struct kfd_device_info navi10_device_info = { 42414328aa5SPhilip Cox .asic_family = CHIP_NAVI10, 425c181159aSYong Zhao .asic_name = "navi10", 4269d6fa9c7SGraham Sider .gfx_target_version = 100100, 42714328aa5SPhilip Cox .max_pasid_bits = 16, 42814328aa5SPhilip Cox .max_no_of_hqd = 24, 42914328aa5SPhilip Cox .doorbell_size = 8, 43014328aa5SPhilip Cox .ih_ring_entry_size = 8 * sizeof(uint32_t), 43114328aa5SPhilip Cox .event_interrupt_class = &event_interrupt_class_v9, 43214328aa5SPhilip Cox .num_of_watch_points = 4, 43314328aa5SPhilip Cox .mqd_size_aligned = MQD_SIZE_ALIGNED, 43414328aa5SPhilip Cox .needs_iommu_device = false, 43514328aa5SPhilip Cox .supports_cwsr = true, 4366cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 437e312af6cSFelix Kuehling .no_atomic_fw_version = 145, 43814328aa5SPhilip Cox .num_sdma_engines = 2, 43914328aa5SPhilip Cox .num_xgmi_sdma_engines = 0, 44014328aa5SPhilip Cox .num_sdma_queues_per_engine = 8, 44114328aa5SPhilip Cox }; 44214328aa5SPhilip Cox 443b77fb9d8Sshaoyunl static const struct kfd_device_info navi12_device_info = { 4440e94b564Sshaoyunl .asic_family = CHIP_NAVI12, 445b77fb9d8Sshaoyunl .asic_name = "navi12", 4469d6fa9c7SGraham Sider .gfx_target_version = 100101, 447b77fb9d8Sshaoyunl .max_pasid_bits = 16, 448b77fb9d8Sshaoyunl .max_no_of_hqd = 24, 449b77fb9d8Sshaoyunl .doorbell_size = 8, 450b77fb9d8Sshaoyunl .ih_ring_entry_size = 8 * sizeof(uint32_t), 451b77fb9d8Sshaoyunl .event_interrupt_class = &event_interrupt_class_v9, 452b77fb9d8Sshaoyunl .num_of_watch_points = 4, 453b77fb9d8Sshaoyunl .mqd_size_aligned = MQD_SIZE_ALIGNED, 454b77fb9d8Sshaoyunl .needs_iommu_device = false, 455b77fb9d8Sshaoyunl .supports_cwsr = true, 4566cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 457e312af6cSFelix Kuehling .no_atomic_fw_version = 145, 458b77fb9d8Sshaoyunl .num_sdma_engines = 2, 459b77fb9d8Sshaoyunl .num_xgmi_sdma_engines = 0, 460b77fb9d8Sshaoyunl .num_sdma_queues_per_engine = 8, 461b77fb9d8Sshaoyunl }; 462b77fb9d8Sshaoyunl 4638099ae40SYong Zhao static const struct kfd_device_info navi14_device_info = { 4648099ae40SYong Zhao .asic_family = CHIP_NAVI14, 4658099ae40SYong Zhao .asic_name = "navi14", 4669d6fa9c7SGraham Sider .gfx_target_version = 100102, 4678099ae40SYong Zhao .max_pasid_bits = 16, 4688099ae40SYong Zhao .max_no_of_hqd = 24, 4698099ae40SYong Zhao .doorbell_size = 8, 4708099ae40SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 4718099ae40SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 4728099ae40SYong Zhao .num_of_watch_points = 4, 4738099ae40SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 4748099ae40SYong Zhao .needs_iommu_device = false, 4758099ae40SYong Zhao .supports_cwsr = true, 4766cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 477e312af6cSFelix Kuehling .no_atomic_fw_version = 145, 4788099ae40SYong Zhao .num_sdma_engines = 2, 4798099ae40SYong Zhao .num_xgmi_sdma_engines = 0, 4808099ae40SYong Zhao .num_sdma_queues_per_engine = 8, 4818099ae40SYong Zhao }; 4828099ae40SYong Zhao 4833a2f0c81SYong Zhao static const struct kfd_device_info sienna_cichlid_device_info = { 4843a2f0c81SYong Zhao .asic_family = CHIP_SIENNA_CICHLID, 4853a2f0c81SYong Zhao .asic_name = "sienna_cichlid", 4869d6fa9c7SGraham Sider .gfx_target_version = 100300, 4873a2f0c81SYong Zhao .max_pasid_bits = 16, 4883a2f0c81SYong Zhao .max_no_of_hqd = 24, 4893a2f0c81SYong Zhao .doorbell_size = 8, 4903a2f0c81SYong Zhao .ih_ring_entry_size = 8 * sizeof(uint32_t), 4913a2f0c81SYong Zhao .event_interrupt_class = &event_interrupt_class_v9, 4923a2f0c81SYong Zhao .num_of_watch_points = 4, 4933a2f0c81SYong Zhao .mqd_size_aligned = MQD_SIZE_ALIGNED, 4943a2f0c81SYong Zhao .needs_iommu_device = false, 4953a2f0c81SYong Zhao .supports_cwsr = true, 4966cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 497e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 4983a2f0c81SYong Zhao .num_sdma_engines = 4, 4993a2f0c81SYong Zhao .num_xgmi_sdma_engines = 0, 5003a2f0c81SYong Zhao .num_sdma_queues_per_engine = 8, 5013a2f0c81SYong Zhao }; 5023a2f0c81SYong Zhao 503de89b2e4SChengming Gui static const struct kfd_device_info navy_flounder_device_info = { 504de89b2e4SChengming Gui .asic_family = CHIP_NAVY_FLOUNDER, 505de89b2e4SChengming Gui .asic_name = "navy_flounder", 5069d6fa9c7SGraham Sider .gfx_target_version = 100301, 507de89b2e4SChengming Gui .max_pasid_bits = 16, 508de89b2e4SChengming Gui .max_no_of_hqd = 24, 509de89b2e4SChengming Gui .doorbell_size = 8, 510de89b2e4SChengming Gui .ih_ring_entry_size = 8 * sizeof(uint32_t), 511de89b2e4SChengming Gui .event_interrupt_class = &event_interrupt_class_v9, 512de89b2e4SChengming Gui .num_of_watch_points = 4, 513de89b2e4SChengming Gui .mqd_size_aligned = MQD_SIZE_ALIGNED, 514de89b2e4SChengming Gui .needs_iommu_device = false, 515de89b2e4SChengming Gui .supports_cwsr = true, 5166cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 517e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 518de89b2e4SChengming Gui .num_sdma_engines = 2, 519de89b2e4SChengming Gui .num_xgmi_sdma_engines = 0, 520de89b2e4SChengming Gui .num_sdma_queues_per_engine = 8, 521de89b2e4SChengming Gui }; 522de89b2e4SChengming Gui 5233a5e715dSHuang Rui static const struct kfd_device_info vangogh_device_info = { 5243a5e715dSHuang Rui .asic_family = CHIP_VANGOGH, 5253a5e715dSHuang Rui .asic_name = "vangogh", 5269d6fa9c7SGraham Sider .gfx_target_version = 100303, 5273a5e715dSHuang Rui .max_pasid_bits = 16, 5283a5e715dSHuang Rui .max_no_of_hqd = 24, 5293a5e715dSHuang Rui .doorbell_size = 8, 5303a5e715dSHuang Rui .ih_ring_entry_size = 8 * sizeof(uint32_t), 5313a5e715dSHuang Rui .event_interrupt_class = &event_interrupt_class_v9, 5323a5e715dSHuang Rui .num_of_watch_points = 4, 5333a5e715dSHuang Rui .mqd_size_aligned = MQD_SIZE_ALIGNED, 5343a5e715dSHuang Rui .needs_iommu_device = false, 5353a5e715dSHuang Rui .supports_cwsr = true, 536e312af6cSFelix Kuehling .needs_pci_atomics = true, 537e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 5383a5e715dSHuang Rui .num_sdma_engines = 1, 5393a5e715dSHuang Rui .num_xgmi_sdma_engines = 0, 5403a5e715dSHuang Rui .num_sdma_queues_per_engine = 2, 5413a5e715dSHuang Rui }; 5423a5e715dSHuang Rui 543eb5a34d4SChengming Gui static const struct kfd_device_info dimgrey_cavefish_device_info = { 544eb5a34d4SChengming Gui .asic_family = CHIP_DIMGREY_CAVEFISH, 545eb5a34d4SChengming Gui .asic_name = "dimgrey_cavefish", 5469d6fa9c7SGraham Sider .gfx_target_version = 100302, 547eb5a34d4SChengming Gui .max_pasid_bits = 16, 548eb5a34d4SChengming Gui .max_no_of_hqd = 24, 549eb5a34d4SChengming Gui .doorbell_size = 8, 550eb5a34d4SChengming Gui .ih_ring_entry_size = 8 * sizeof(uint32_t), 551eb5a34d4SChengming Gui .event_interrupt_class = &event_interrupt_class_v9, 552eb5a34d4SChengming Gui .num_of_watch_points = 4, 553eb5a34d4SChengming Gui .mqd_size_aligned = MQD_SIZE_ALIGNED, 554eb5a34d4SChengming Gui .needs_iommu_device = false, 555eb5a34d4SChengming Gui .supports_cwsr = true, 5566cc980e3SHarish Kasiviswanathan .needs_pci_atomics = true, 557e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 558eb5a34d4SChengming Gui .num_sdma_engines = 2, 559eb5a34d4SChengming Gui .num_xgmi_sdma_engines = 0, 560eb5a34d4SChengming Gui .num_sdma_queues_per_engine = 8, 561eb5a34d4SChengming Gui }; 562eb5a34d4SChengming Gui 5635cf607ccSChengming Gui static const struct kfd_device_info beige_goby_device_info = { 5645cf607ccSChengming Gui .asic_family = CHIP_BEIGE_GOBY, 5655cf607ccSChengming Gui .asic_name = "beige_goby", 5669d6fa9c7SGraham Sider .gfx_target_version = 100304, 5675cf607ccSChengming Gui .max_pasid_bits = 16, 5685cf607ccSChengming Gui .max_no_of_hqd = 24, 5695cf607ccSChengming Gui .doorbell_size = 8, 5705cf607ccSChengming Gui .ih_ring_entry_size = 8 * sizeof(uint32_t), 5715cf607ccSChengming Gui .event_interrupt_class = &event_interrupt_class_v9, 5725cf607ccSChengming Gui .num_of_watch_points = 4, 5735cf607ccSChengming Gui .mqd_size_aligned = MQD_SIZE_ALIGNED, 5745cf607ccSChengming Gui .needs_iommu_device = false, 5755cf607ccSChengming Gui .supports_cwsr = true, 5765cf607ccSChengming Gui .needs_pci_atomics = true, 577e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 5785cf607ccSChengming Gui .num_sdma_engines = 1, 5795cf607ccSChengming Gui .num_xgmi_sdma_engines = 0, 5805cf607ccSChengming Gui .num_sdma_queues_per_engine = 8, 5815cf607ccSChengming Gui }; 5825cf607ccSChengming Gui 583bf9d4e88SAaron Liu static const struct kfd_device_info yellow_carp_device_info = { 584bf9d4e88SAaron Liu .asic_family = CHIP_YELLOW_CARP, 585bf9d4e88SAaron Liu .asic_name = "yellow_carp", 5869d6fa9c7SGraham Sider .gfx_target_version = 100305, 587bf9d4e88SAaron Liu .max_pasid_bits = 16, 588bf9d4e88SAaron Liu .max_no_of_hqd = 24, 589bf9d4e88SAaron Liu .doorbell_size = 8, 590bf9d4e88SAaron Liu .ih_ring_entry_size = 8 * sizeof(uint32_t), 591bf9d4e88SAaron Liu .event_interrupt_class = &event_interrupt_class_v9, 592bf9d4e88SAaron Liu .num_of_watch_points = 4, 593bf9d4e88SAaron Liu .mqd_size_aligned = MQD_SIZE_ALIGNED, 594bf9d4e88SAaron Liu .needs_iommu_device = false, 595bf9d4e88SAaron Liu .supports_cwsr = true, 596e312af6cSFelix Kuehling .needs_pci_atomics = true, 597e312af6cSFelix Kuehling .no_atomic_fw_version = 92, 598bf9d4e88SAaron Liu .num_sdma_engines = 1, 599bf9d4e88SAaron Liu .num_xgmi_sdma_engines = 0, 600bf9d4e88SAaron Liu .num_sdma_queues_per_engine = 2, 601bf9d4e88SAaron Liu }; 602eb5a34d4SChengming Gui 60306e75b88STao Zhou static const struct kfd_device_info cyan_skillfish_device_info = { 60406e75b88STao Zhou .asic_family = CHIP_CYAN_SKILLFISH, 60506e75b88STao Zhou .asic_name = "cyan_skillfish", 6069d6fa9c7SGraham Sider .gfx_target_version = 100103, 60706e75b88STao Zhou .max_pasid_bits = 16, 60806e75b88STao Zhou .max_no_of_hqd = 24, 60906e75b88STao Zhou .doorbell_size = 8, 61006e75b88STao Zhou .ih_ring_entry_size = 8 * sizeof(uint32_t), 61106e75b88STao Zhou .event_interrupt_class = &event_interrupt_class_v9, 61206e75b88STao Zhou .num_of_watch_points = 4, 61306e75b88STao Zhou .mqd_size_aligned = MQD_SIZE_ALIGNED, 61406e75b88STao Zhou .needs_iommu_device = false, 61506e75b88STao Zhou .supports_cwsr = true, 61606e75b88STao Zhou .needs_pci_atomics = true, 61706e75b88STao Zhou .num_sdma_engines = 2, 61806e75b88STao Zhou .num_xgmi_sdma_engines = 0, 61906e75b88STao Zhou .num_sdma_queues_per_engine = 8, 62006e75b88STao Zhou }; 62106e75b88STao Zhou 6226e81090bSOded Gabbay static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 6236e81090bSOded Gabbay unsigned int chunk_size); 6246e81090bSOded Gabbay static void kfd_gtt_sa_fini(struct kfd_dev *kfd); 6256e81090bSOded Gabbay 626b8935a7cSYong Zhao static int kfd_resume(struct kfd_dev *kfd); 627b8935a7cSYong Zhao 6285b983db8SAlex Deucher struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) 6294a488a7aSOded Gabbay { 6304a488a7aSOded Gabbay struct kfd_dev *kfd; 631050091abSYong Zhao const struct kfd_device_info *device_info; 632e392c887SYong Zhao const struct kfd2kgd_calls *f2g; 6335b983db8SAlex Deucher struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 6345b983db8SAlex Deucher struct pci_dev *pdev = adev->pdev; 635050091abSYong Zhao 636*c868d584SAlex Deucher switch (adev->asic_type) { 637*c868d584SAlex Deucher #ifdef KFD_SUPPORT_IOMMU_V2 638*c868d584SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 639*c868d584SAlex Deucher case CHIP_KAVERI: 640*c868d584SAlex Deucher if (vf) 641*c868d584SAlex Deucher device_info = NULL; 642*c868d584SAlex Deucher else 643*c868d584SAlex Deucher device_info = &kaveri_device_info; 644*c868d584SAlex Deucher f2g = &gfx_v7_kfd2kgd; 645*c868d584SAlex Deucher break; 646*c868d584SAlex Deucher #endif 647*c868d584SAlex Deucher case CHIP_CARRIZO: 648*c868d584SAlex Deucher if (vf) 649*c868d584SAlex Deucher device_info = NULL; 650*c868d584SAlex Deucher else 651*c868d584SAlex Deucher device_info = &carrizo_device_info; 652*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 653*c868d584SAlex Deucher break; 654*c868d584SAlex Deucher #endif 655*c868d584SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK 656*c868d584SAlex Deucher case CHIP_HAWAII: 657*c868d584SAlex Deucher if (vf) 658*c868d584SAlex Deucher device_info = NULL; 659*c868d584SAlex Deucher else 660*c868d584SAlex Deucher device_info = &hawaii_device_info; 661*c868d584SAlex Deucher f2g = &gfx_v7_kfd2kgd; 662*c868d584SAlex Deucher break; 663*c868d584SAlex Deucher #endif 664*c868d584SAlex Deucher case CHIP_TONGA: 665*c868d584SAlex Deucher if (vf) 666*c868d584SAlex Deucher device_info = NULL; 667*c868d584SAlex Deucher else 668*c868d584SAlex Deucher device_info = &tonga_device_info; 669*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 670*c868d584SAlex Deucher break; 671*c868d584SAlex Deucher case CHIP_FIJI: 672*c868d584SAlex Deucher if (vf) 673*c868d584SAlex Deucher device_info = &fiji_vf_device_info; 674*c868d584SAlex Deucher else 675*c868d584SAlex Deucher device_info = &fiji_device_info; 676*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 677*c868d584SAlex Deucher break; 678*c868d584SAlex Deucher case CHIP_POLARIS10: 679*c868d584SAlex Deucher if (vf) 680*c868d584SAlex Deucher device_info = &polaris10_vf_device_info; 681*c868d584SAlex Deucher else 682*c868d584SAlex Deucher device_info = &polaris10_device_info; 683*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 684*c868d584SAlex Deucher break; 685*c868d584SAlex Deucher case CHIP_POLARIS11: 686*c868d584SAlex Deucher if (vf) 687*c868d584SAlex Deucher device_info = NULL; 688*c868d584SAlex Deucher else 689*c868d584SAlex Deucher device_info = &polaris11_device_info; 690*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 691*c868d584SAlex Deucher break; 692*c868d584SAlex Deucher case CHIP_POLARIS12: 693*c868d584SAlex Deucher if (vf) 694*c868d584SAlex Deucher device_info = NULL; 695*c868d584SAlex Deucher else 696*c868d584SAlex Deucher device_info = &polaris12_device_info; 697*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 698*c868d584SAlex Deucher break; 699*c868d584SAlex Deucher case CHIP_VEGAM: 700*c868d584SAlex Deucher if (vf) 701*c868d584SAlex Deucher device_info = NULL; 702*c868d584SAlex Deucher else 703*c868d584SAlex Deucher device_info = &vegam_device_info; 704*c868d584SAlex Deucher f2g = &gfx_v8_kfd2kgd; 705*c868d584SAlex Deucher break; 706*c868d584SAlex Deucher default: 707*c868d584SAlex Deucher switch (adev->ip_versions[GC_HWIP][0]) { 708*c868d584SAlex Deucher case IP_VERSION(9, 0, 1): 709*c868d584SAlex Deucher if (vf) 710*c868d584SAlex Deucher device_info = &vega10_vf_device_info; 711*c868d584SAlex Deucher else 712*c868d584SAlex Deucher device_info = &vega10_device_info; 713*c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 714*c868d584SAlex Deucher break; 715*c868d584SAlex Deucher #ifdef KFD_SUPPORT_IOMMU_V2 716*c868d584SAlex Deucher case IP_VERSION(9, 1, 0): 717*c868d584SAlex Deucher case IP_VERSION(9, 2, 2): 718*c868d584SAlex Deucher if (vf) 719*c868d584SAlex Deucher device_info = NULL; 720*c868d584SAlex Deucher else 721*c868d584SAlex Deucher device_info = &raven_device_info; 722*c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 723*c868d584SAlex Deucher break; 724*c868d584SAlex Deucher #endif 725*c868d584SAlex Deucher case IP_VERSION(9, 2, 1): 726*c868d584SAlex Deucher if (vf) 727*c868d584SAlex Deucher device_info = NULL; 728*c868d584SAlex Deucher else 729*c868d584SAlex Deucher device_info = &vega12_device_info; 730*c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 731*c868d584SAlex Deucher break; 732*c868d584SAlex Deucher case IP_VERSION(9, 3, 0): 733*c868d584SAlex Deucher if (vf) 734*c868d584SAlex Deucher device_info = NULL; 735*c868d584SAlex Deucher else 736*c868d584SAlex Deucher device_info = &renoir_device_info; 737*c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 738*c868d584SAlex Deucher break; 739*c868d584SAlex Deucher case IP_VERSION(9, 4, 0): 740*c868d584SAlex Deucher if (vf) 741*c868d584SAlex Deucher device_info = NULL; 742*c868d584SAlex Deucher else 743*c868d584SAlex Deucher device_info = &vega20_device_info; 744*c868d584SAlex Deucher f2g = &gfx_v9_kfd2kgd; 745*c868d584SAlex Deucher break; 746*c868d584SAlex Deucher case IP_VERSION(9, 4, 1): 747*c868d584SAlex Deucher device_info = &arcturus_device_info; 748*c868d584SAlex Deucher f2g = &arcturus_kfd2kgd; 749*c868d584SAlex Deucher break; 750*c868d584SAlex Deucher case IP_VERSION(9, 4, 2): 751*c868d584SAlex Deucher device_info = &aldebaran_device_info; 752*c868d584SAlex Deucher f2g = &aldebaran_kfd2kgd; 753*c868d584SAlex Deucher break; 754*c868d584SAlex Deucher case IP_VERSION(10, 1, 10): 755*c868d584SAlex Deucher if (vf) 756*c868d584SAlex Deucher device_info = NULL; 757*c868d584SAlex Deucher else 758*c868d584SAlex Deucher device_info = &navi10_device_info; 759*c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 760*c868d584SAlex Deucher break; 761*c868d584SAlex Deucher case IP_VERSION(10, 1, 2): 762*c868d584SAlex Deucher device_info = &navi12_device_info; 763*c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 764*c868d584SAlex Deucher break; 765*c868d584SAlex Deucher case IP_VERSION(10, 1, 1): 766*c868d584SAlex Deucher if (vf) 767*c868d584SAlex Deucher device_info = NULL; 768*c868d584SAlex Deucher else 769*c868d584SAlex Deucher device_info = &navi14_device_info; 770*c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 771*c868d584SAlex Deucher break; 772*c868d584SAlex Deucher case IP_VERSION(10, 1, 3): 773*c868d584SAlex Deucher if (vf) 774*c868d584SAlex Deucher device_info = NULL; 775*c868d584SAlex Deucher else 776*c868d584SAlex Deucher device_info = &cyan_skillfish_device_info; 777*c868d584SAlex Deucher f2g = &gfx_v10_kfd2kgd; 778*c868d584SAlex Deucher break; 779*c868d584SAlex Deucher case IP_VERSION(10, 3, 0): 780*c868d584SAlex Deucher device_info = &sienna_cichlid_device_info; 781*c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 782*c868d584SAlex Deucher break; 783*c868d584SAlex Deucher case IP_VERSION(10, 3, 2): 784*c868d584SAlex Deucher device_info = &navy_flounder_device_info; 785*c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 786*c868d584SAlex Deucher break; 787*c868d584SAlex Deucher case IP_VERSION(10, 3, 1): 788*c868d584SAlex Deucher if (vf) 789*c868d584SAlex Deucher device_info = NULL; 790*c868d584SAlex Deucher else 791*c868d584SAlex Deucher device_info = &vangogh_device_info; 792*c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 793*c868d584SAlex Deucher break; 794*c868d584SAlex Deucher case IP_VERSION(10, 3, 4): 795*c868d584SAlex Deucher device_info = &dimgrey_cavefish_device_info; 796*c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 797*c868d584SAlex Deucher break; 798*c868d584SAlex Deucher case IP_VERSION(10, 3, 5): 799*c868d584SAlex Deucher device_info = &beige_goby_device_info; 800*c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 801*c868d584SAlex Deucher break; 802*c868d584SAlex Deucher case IP_VERSION(10, 3, 3): 803*c868d584SAlex Deucher if (vf) 804*c868d584SAlex Deucher device_info = NULL; 805*c868d584SAlex Deucher else 806*c868d584SAlex Deucher device_info = &yellow_carp_device_info; 807*c868d584SAlex Deucher f2g = &gfx_v10_3_kfd2kgd; 808*c868d584SAlex Deucher break; 809*c868d584SAlex Deucher default: 810*c868d584SAlex Deucher return NULL; 811050091abSYong Zhao } 812*c868d584SAlex Deucher break; 813*c868d584SAlex Deucher } 8144a488a7aSOded Gabbay 815aa5e899dSDan Carpenter if (!device_info || !f2g) { 816050091abSYong Zhao dev_err(kfd_device, "%s %s not supported in kfd\n", 817*c868d584SAlex Deucher amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); 8184a488a7aSOded Gabbay return NULL; 8194ebc7182SYong Zhao } 8204a488a7aSOded Gabbay 821d35f00d8SEric Huang kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); 822d35f00d8SEric Huang if (!kfd) 823d35f00d8SEric Huang return NULL; 824d35f00d8SEric Huang 8254a488a7aSOded Gabbay kfd->kgd = kgd; 8264a488a7aSOded Gabbay kfd->device_info = device_info; 8274a488a7aSOded Gabbay kfd->pdev = pdev; 82819f6d2a6SOded Gabbay kfd->init_complete = false; 829cea405b1SXihan Zhang kfd->kfd2kgd = f2g; 83043d8107fSHarish Kasiviswanathan atomic_set(&kfd->compute_profile, 0); 831cea405b1SXihan Zhang 832cea405b1SXihan Zhang mutex_init(&kfd->doorbell_mutex); 833cea405b1SXihan Zhang memset(&kfd->doorbell_available_index, 0, 834cea405b1SXihan Zhang sizeof(kfd->doorbell_available_index)); 8354a488a7aSOded Gabbay 8369b54d201SEric Huang atomic_set(&kfd->sram_ecc_flag, 0); 8379b54d201SEric Huang 83859d7115dSMukul Joshi ida_init(&kfd->doorbell_ida); 83959d7115dSMukul Joshi 8404a488a7aSOded Gabbay return kfd; 8414a488a7aSOded Gabbay } 8424a488a7aSOded Gabbay 843373d7080SFelix Kuehling static void kfd_cwsr_init(struct kfd_dev *kfd) 844373d7080SFelix Kuehling { 845373d7080SFelix Kuehling if (cwsr_enable && kfd->device_info->supports_cwsr) { 8463e76c239SFelix Kuehling if (kfd->device_info->asic_family < CHIP_VEGA10) { 847373d7080SFelix Kuehling BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); 848373d7080SFelix Kuehling kfd->cwsr_isa = cwsr_trap_gfx8_hex; 849373d7080SFelix Kuehling kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); 8500ef6845cSJay Cornwall } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { 8513baa24f0SOak Zeng BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); 8523baa24f0SOak Zeng kfd->cwsr_isa = cwsr_trap_arcturus_hex; 8533baa24f0SOak Zeng kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); 8540ef6845cSJay Cornwall } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { 8550ef6845cSJay Cornwall BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); 8560ef6845cSJay Cornwall kfd->cwsr_isa = cwsr_trap_aldebaran_hex; 8570ef6845cSJay Cornwall kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); 85814328aa5SPhilip Cox } else if (kfd->device_info->asic_family < CHIP_NAVI10) { 8593e76c239SFelix Kuehling BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); 8603e76c239SFelix Kuehling kfd->cwsr_isa = cwsr_trap_gfx9_hex; 8613e76c239SFelix Kuehling kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); 86280b6cfedSJay Cornwall } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { 86380b6cfedSJay Cornwall BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); 86480b6cfedSJay Cornwall kfd->cwsr_isa = cwsr_trap_nv1x_hex; 86580b6cfedSJay Cornwall kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); 86614328aa5SPhilip Cox } else { 86714328aa5SPhilip Cox BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); 86814328aa5SPhilip Cox kfd->cwsr_isa = cwsr_trap_gfx10_hex; 86914328aa5SPhilip Cox kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); 8703e76c239SFelix Kuehling } 8713e76c239SFelix Kuehling 872373d7080SFelix Kuehling kfd->cwsr_enabled = true; 873373d7080SFelix Kuehling } 874373d7080SFelix Kuehling } 875373d7080SFelix Kuehling 87629633d0eSJoseph Greathouse static int kfd_gws_init(struct kfd_dev *kfd) 87729633d0eSJoseph Greathouse { 87829633d0eSJoseph Greathouse int ret = 0; 87929633d0eSJoseph Greathouse 88029633d0eSJoseph Greathouse if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) 88129633d0eSJoseph Greathouse return 0; 88229633d0eSJoseph Greathouse 88329633d0eSJoseph Greathouse if (hws_gws_support 884fea7d919SJoseph Greathouse || (kfd->device_info->asic_family == CHIP_VEGA10 885fea7d919SJoseph Greathouse && kfd->mec2_fw_version >= 0x81b3) 886fea7d919SJoseph Greathouse || (kfd->device_info->asic_family >= CHIP_VEGA12 88729633d0eSJoseph Greathouse && kfd->device_info->asic_family <= CHIP_RAVEN 888fea7d919SJoseph Greathouse && kfd->mec2_fw_version >= 0x1b3) 889fea7d919SJoseph Greathouse || (kfd->device_info->asic_family == CHIP_ARCTURUS 8908baa6018SHarish Kasiviswanathan && kfd->mec2_fw_version >= 0x30) 8918baa6018SHarish Kasiviswanathan || (kfd->device_info->asic_family == CHIP_ALDEBARAN 8928baa6018SHarish Kasiviswanathan && kfd->mec2_fw_version >= 0x28)) 89329633d0eSJoseph Greathouse ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, 89429633d0eSJoseph Greathouse amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); 89529633d0eSJoseph Greathouse 89629633d0eSJoseph Greathouse return ret; 89729633d0eSJoseph Greathouse } 89829633d0eSJoseph Greathouse 899938a0650SAmber Lin static void kfd_smi_init(struct kfd_dev *dev) { 900938a0650SAmber Lin INIT_LIST_HEAD(&dev->smi_clients); 901938a0650SAmber Lin spin_lock_init(&dev->smi_lock); 902938a0650SAmber Lin } 903938a0650SAmber Lin 9044a488a7aSOded Gabbay bool kgd2kfd_device_init(struct kfd_dev *kfd, 9053a0c3423SHarish Kasiviswanathan struct drm_device *ddev, 9064a488a7aSOded Gabbay const struct kgd2kfd_shared_resources *gpu_resources) 9074a488a7aSOded Gabbay { 908fd6a440eSJonathan Kim unsigned int size, map_process_packet_size; 90919f6d2a6SOded Gabbay 9103a0c3423SHarish Kasiviswanathan kfd->ddev = ddev; 9110da8b10eSAmber Lin kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, 9125ade6c9cSFelix Kuehling KGD_ENGINE_MEC1); 91329633d0eSJoseph Greathouse kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, 91429633d0eSJoseph Greathouse KGD_ENGINE_MEC2); 9150da8b10eSAmber Lin kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, 9165ade6c9cSFelix Kuehling KGD_ENGINE_SDMA1); 9174a488a7aSOded Gabbay kfd->shared_resources = *gpu_resources; 9184a488a7aSOded Gabbay 91944008d7aSYong Zhao kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; 92044008d7aSYong Zhao kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 92144008d7aSYong Zhao kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd 92244008d7aSYong Zhao - kfd->vm_info.first_vmid_kfd + 1; 92344008d7aSYong Zhao 924e312af6cSFelix Kuehling /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. 925e312af6cSFelix Kuehling * 32 and 64-bit requests are possible and must be 926e312af6cSFelix Kuehling * supported. 927e312af6cSFelix Kuehling */ 928e312af6cSFelix Kuehling kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); 929e312af6cSFelix Kuehling if (!kfd->pci_atomic_requested && 930e312af6cSFelix Kuehling kfd->device_info->needs_pci_atomics && 931e312af6cSFelix Kuehling (!kfd->device_info->no_atomic_fw_version || 932e312af6cSFelix Kuehling kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { 933e312af6cSFelix Kuehling dev_info(kfd_device, 934e312af6cSFelix Kuehling "skipped device %x:%x, PCI rejects atomics %d<%d\n", 935e312af6cSFelix Kuehling kfd->pdev->vendor, kfd->pdev->device, 936e312af6cSFelix Kuehling kfd->mec_fw_version, 937e312af6cSFelix Kuehling kfd->device_info->no_atomic_fw_version); 938e312af6cSFelix Kuehling return false; 939e312af6cSFelix Kuehling } 940e312af6cSFelix Kuehling 941a99c6d4fSFelix Kuehling /* Verify module parameters regarding mapped process number*/ 942a99c6d4fSFelix Kuehling if ((hws_max_conc_proc < 0) 943a99c6d4fSFelix Kuehling || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { 944a99c6d4fSFelix Kuehling dev_err(kfd_device, 945a99c6d4fSFelix Kuehling "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", 946a99c6d4fSFelix Kuehling hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, 947a99c6d4fSFelix Kuehling kfd->vm_info.vmid_num_kfd); 948a99c6d4fSFelix Kuehling kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; 949a99c6d4fSFelix Kuehling } else 950a99c6d4fSFelix Kuehling kfd->max_proc_per_quantum = hws_max_conc_proc; 951a99c6d4fSFelix Kuehling 95219f6d2a6SOded Gabbay /* calculate max size of mqds needed for queues */ 953b8cbab04SOded Gabbay size = max_num_of_queues_per_device * 95419f6d2a6SOded Gabbay kfd->device_info->mqd_size_aligned; 95519f6d2a6SOded Gabbay 956e18e794eSOded Gabbay /* 957e18e794eSOded Gabbay * calculate max size of runlist packet. 958e18e794eSOded Gabbay * There can be only 2 packets at once 959e18e794eSOded Gabbay */ 960fd6a440eSJonathan Kim map_process_packet_size = 961fd6a440eSJonathan Kim kfd->device_info->asic_family == CHIP_ALDEBARAN ? 962fd6a440eSJonathan Kim sizeof(struct pm4_mes_map_process_aldebaran) : 963fd6a440eSJonathan Kim sizeof(struct pm4_mes_map_process); 964fd6a440eSJonathan Kim size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + 965507968ddSFelix Kuehling max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) 966507968ddSFelix Kuehling + sizeof(struct pm4_mes_runlist)) * 2; 967e18e794eSOded Gabbay 968e18e794eSOded Gabbay /* Add size of HIQ & DIQ */ 969e18e794eSOded Gabbay size += KFD_KERNEL_QUEUE_SIZE * 2; 970e18e794eSOded Gabbay 971e18e794eSOded Gabbay /* add another 512KB for all other allocations on gart (HPD, fences) */ 97219f6d2a6SOded Gabbay size += 512 * 1024; 97319f6d2a6SOded Gabbay 9747cd52c91SAmber Lin if (amdgpu_amdkfd_alloc_gtt_mem( 975cea405b1SXihan Zhang kfd->kgd, size, &kfd->gtt_mem, 97615426dbbSYong Zhao &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, 97715426dbbSYong Zhao false)) { 97879775b62SKent Russell dev_err(kfd_device, "Could not allocate %d bytes\n", size); 979e09d4fc8SOak Zeng goto alloc_gtt_mem_failure; 98019f6d2a6SOded Gabbay } 98119f6d2a6SOded Gabbay 98279775b62SKent Russell dev_info(kfd_device, "Allocated %d bytes on gart\n", size); 983e18e794eSOded Gabbay 98473a1da0bSOded Gabbay /* Initialize GTT sa with 512 byte chunk size */ 98573a1da0bSOded Gabbay if (kfd_gtt_sa_init(kfd, size, 512) != 0) { 98679775b62SKent Russell dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); 98773a1da0bSOded Gabbay goto kfd_gtt_sa_init_error; 98873a1da0bSOded Gabbay } 98973a1da0bSOded Gabbay 990735df2baSFelix Kuehling if (kfd_doorbell_init(kfd)) { 991735df2baSFelix Kuehling dev_err(kfd_device, 992735df2baSFelix Kuehling "Error initializing doorbell aperture\n"); 993735df2baSFelix Kuehling goto kfd_doorbell_error; 994735df2baSFelix Kuehling } 99519f6d2a6SOded Gabbay 996332f6e1eSFelix Kuehling kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); 9970c1690e3SShaoyun Liu 9989b498efaSAlex Deucher kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); 9999b498efaSAlex Deucher 10002249d558SAndrew Lewycky if (kfd_interrupt_init(kfd)) { 100179775b62SKent Russell dev_err(kfd_device, "Error initializing interrupts\n"); 10022249d558SAndrew Lewycky goto kfd_interrupt_error; 10032249d558SAndrew Lewycky } 10042249d558SAndrew Lewycky 100564c7f8cfSBen Goz kfd->dqm = device_queue_manager_init(kfd); 100664c7f8cfSBen Goz if (!kfd->dqm) { 100779775b62SKent Russell dev_err(kfd_device, "Error initializing queue manager\n"); 100864c7f8cfSBen Goz goto device_queue_manager_error; 100964c7f8cfSBen Goz } 101064c7f8cfSBen Goz 101129633d0eSJoseph Greathouse /* If supported on this device, allocate global GWS that is shared 101229633d0eSJoseph Greathouse * by all KFD processes 101329633d0eSJoseph Greathouse */ 101429633d0eSJoseph Greathouse if (kfd_gws_init(kfd)) { 101529633d0eSJoseph Greathouse dev_err(kfd_device, "Could not allocate %d gws\n", 101629633d0eSJoseph Greathouse amdgpu_amdkfd_get_num_gws(kfd->kgd)); 101729633d0eSJoseph Greathouse goto gws_error; 101829633d0eSJoseph Greathouse } 101929633d0eSJoseph Greathouse 10206127896fSHuang Rui /* If CRAT is broken, won't set iommu enabled */ 10216127896fSHuang Rui kfd_double_confirm_iommu_support(kfd); 10226127896fSHuang Rui 102364d1c3a4SFelix Kuehling if (kfd_iommu_device_init(kfd)) { 102464d1c3a4SFelix Kuehling dev_err(kfd_device, "Error initializing iommuv2\n"); 102564d1c3a4SFelix Kuehling goto device_iommu_error; 102664c7f8cfSBen Goz } 102764c7f8cfSBen Goz 1028373d7080SFelix Kuehling kfd_cwsr_init(kfd); 1029373d7080SFelix Kuehling 1030814ab993SPhilip Yang svm_migrate_init((struct amdgpu_device *)kfd->kgd); 1031814ab993SPhilip Yang 1032b8935a7cSYong Zhao if (kfd_resume(kfd)) 1033b8935a7cSYong Zhao goto kfd_resume_error; 1034b8935a7cSYong Zhao 1035fbeb661bSYair Shachar kfd->dbgmgr = NULL; 1036fbeb661bSYair Shachar 1037465ab9e0SOak Zeng if (kfd_topology_add_device(kfd)) { 1038465ab9e0SOak Zeng dev_err(kfd_device, "Error adding device to topology\n"); 1039465ab9e0SOak Zeng goto kfd_topology_add_device_error; 1040465ab9e0SOak Zeng } 1041465ab9e0SOak Zeng 1042938a0650SAmber Lin kfd_smi_init(kfd); 1043938a0650SAmber Lin 10444a488a7aSOded Gabbay kfd->init_complete = true; 104579775b62SKent Russell dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, 10464a488a7aSOded Gabbay kfd->pdev->device); 10474a488a7aSOded Gabbay 104879775b62SKent Russell pr_debug("Starting kfd with the following scheduling policy %d\n", 1049d146c5a7SFelix Kuehling kfd->dqm->sched_policy); 105064c7f8cfSBen Goz 105119f6d2a6SOded Gabbay goto out; 105219f6d2a6SOded Gabbay 1053465ab9e0SOak Zeng kfd_topology_add_device_error: 1054b8935a7cSYong Zhao kfd_resume_error: 105564d1c3a4SFelix Kuehling device_iommu_error: 105629633d0eSJoseph Greathouse gws_error: 105764c7f8cfSBen Goz device_queue_manager_uninit(kfd->dqm); 105864c7f8cfSBen Goz device_queue_manager_error: 10592249d558SAndrew Lewycky kfd_interrupt_exit(kfd); 10602249d558SAndrew Lewycky kfd_interrupt_error: 1061735df2baSFelix Kuehling kfd_doorbell_fini(kfd); 1062735df2baSFelix Kuehling kfd_doorbell_error: 106373a1da0bSOded Gabbay kfd_gtt_sa_fini(kfd); 106473a1da0bSOded Gabbay kfd_gtt_sa_init_error: 10657cd52c91SAmber Lin amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); 1066e09d4fc8SOak Zeng alloc_gtt_mem_failure: 106729633d0eSJoseph Greathouse if (kfd->gws) 1068e09d4fc8SOak Zeng amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); 106919f6d2a6SOded Gabbay dev_err(kfd_device, 107079775b62SKent Russell "device %x:%x NOT added due to errors\n", 107119f6d2a6SOded Gabbay kfd->pdev->vendor, kfd->pdev->device); 107219f6d2a6SOded Gabbay out: 107319f6d2a6SOded Gabbay return kfd->init_complete; 10744a488a7aSOded Gabbay } 10754a488a7aSOded Gabbay 10764a488a7aSOded Gabbay void kgd2kfd_device_exit(struct kfd_dev *kfd) 10774a488a7aSOded Gabbay { 1078b17f068aSOded Gabbay if (kfd->init_complete) { 107964c7f8cfSBen Goz device_queue_manager_uninit(kfd->dqm); 10802249d558SAndrew Lewycky kfd_interrupt_exit(kfd); 108119f6d2a6SOded Gabbay kfd_topology_remove_device(kfd); 1082735df2baSFelix Kuehling kfd_doorbell_fini(kfd); 108359d7115dSMukul Joshi ida_destroy(&kfd->doorbell_ida); 108473a1da0bSOded Gabbay kfd_gtt_sa_fini(kfd); 10857cd52c91SAmber Lin amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); 108629633d0eSJoseph Greathouse if (kfd->gws) 1087e09d4fc8SOak Zeng amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); 1088b17f068aSOded Gabbay } 10895b5c4e40SEvgeny Pinchuk 10904a488a7aSOded Gabbay kfree(kfd); 10914a488a7aSOded Gabbay } 10924a488a7aSOded Gabbay 1093e3b7a967SShaoyun Liu int kgd2kfd_pre_reset(struct kfd_dev *kfd) 1094e3b7a967SShaoyun Liu { 1095e42051d2SShaoyun Liu if (!kfd->init_complete) 1096e42051d2SShaoyun Liu return 0; 109709c34e8dSFelix Kuehling 109855977744SMukul Joshi kfd_smi_event_update_gpu_reset(kfd, false); 109955977744SMukul Joshi 110009c34e8dSFelix Kuehling kfd->dqm->ops.pre_reset(kfd->dqm); 110109c34e8dSFelix Kuehling 11029593f4d6SRajneesh Bhardwaj kgd2kfd_suspend(kfd, false); 1103e42051d2SShaoyun Liu 1104e42051d2SShaoyun Liu kfd_signal_reset_event(kfd); 1105e3b7a967SShaoyun Liu return 0; 1106e3b7a967SShaoyun Liu } 1107e3b7a967SShaoyun Liu 1108e42051d2SShaoyun Liu /* 1109e42051d2SShaoyun Liu * Fix me. KFD won't be able to resume existing process for now. 1110e42051d2SShaoyun Liu * We will keep all existing process in a evicted state and 1111e42051d2SShaoyun Liu * wait the process to be terminated. 1112e42051d2SShaoyun Liu */ 1113e42051d2SShaoyun Liu 1114e3b7a967SShaoyun Liu int kgd2kfd_post_reset(struct kfd_dev *kfd) 1115e3b7a967SShaoyun Liu { 1116a1bd079fSyu kuai int ret; 1117e42051d2SShaoyun Liu 1118e42051d2SShaoyun Liu if (!kfd->init_complete) 1119e3b7a967SShaoyun Liu return 0; 1120e42051d2SShaoyun Liu 1121e42051d2SShaoyun Liu ret = kfd_resume(kfd); 1122e42051d2SShaoyun Liu if (ret) 1123e42051d2SShaoyun Liu return ret; 1124a1bd079fSyu kuai atomic_dec(&kfd_locked); 11259b54d201SEric Huang 11269b54d201SEric Huang atomic_set(&kfd->sram_ecc_flag, 0); 11279b54d201SEric Huang 112855977744SMukul Joshi kfd_smi_event_update_gpu_reset(kfd, true); 112955977744SMukul Joshi 1130e42051d2SShaoyun Liu return 0; 1131e42051d2SShaoyun Liu } 1132e42051d2SShaoyun Liu 1133e42051d2SShaoyun Liu bool kfd_is_locked(void) 1134e42051d2SShaoyun Liu { 1135e42051d2SShaoyun Liu return (atomic_read(&kfd_locked) > 0); 1136e3b7a967SShaoyun Liu } 1137e3b7a967SShaoyun Liu 11389593f4d6SRajneesh Bhardwaj void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) 11394a488a7aSOded Gabbay { 1140733fa1f7SYong Zhao if (!kfd->init_complete) 1141733fa1f7SYong Zhao return; 1142733fa1f7SYong Zhao 11439593f4d6SRajneesh Bhardwaj /* for runtime suspend, skip locking kfd */ 11449593f4d6SRajneesh Bhardwaj if (!run_pm) { 114526103436SFelix Kuehling /* For first KFD device suspend all the KFD processes */ 1146e42051d2SShaoyun Liu if (atomic_inc_return(&kfd_locked) == 1) 114726103436SFelix Kuehling kfd_suspend_all_processes(); 11489593f4d6SRajneesh Bhardwaj } 114926103436SFelix Kuehling 115045c9a5e4SOded Gabbay kfd->dqm->ops.stop(kfd->dqm); 115164d1c3a4SFelix Kuehling kfd_iommu_suspend(kfd); 11524a488a7aSOded Gabbay } 11534a488a7aSOded Gabbay 11549593f4d6SRajneesh Bhardwaj int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) 11554a488a7aSOded Gabbay { 115626103436SFelix Kuehling int ret, count; 115726103436SFelix Kuehling 1158b8935a7cSYong Zhao if (!kfd->init_complete) 1159b8935a7cSYong Zhao return 0; 1160b17f068aSOded Gabbay 116126103436SFelix Kuehling ret = kfd_resume(kfd); 116226103436SFelix Kuehling if (ret) 116326103436SFelix Kuehling return ret; 1164b17f068aSOded Gabbay 11659593f4d6SRajneesh Bhardwaj /* for runtime resume, skip unlocking kfd */ 11669593f4d6SRajneesh Bhardwaj if (!run_pm) { 1167e42051d2SShaoyun Liu count = atomic_dec_return(&kfd_locked); 116826103436SFelix Kuehling WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); 116926103436SFelix Kuehling if (count == 0) 117026103436SFelix Kuehling ret = kfd_resume_all_processes(); 11719593f4d6SRajneesh Bhardwaj } 117226103436SFelix Kuehling 117326103436SFelix Kuehling return ret; 11744ebc7182SYong Zhao } 11754ebc7182SYong Zhao 1176f8846323SJames Zhu int kgd2kfd_resume_iommu(struct kfd_dev *kfd) 1177b8935a7cSYong Zhao { 1178b8935a7cSYong Zhao int err = 0; 1179b8935a7cSYong Zhao 118064d1c3a4SFelix Kuehling err = kfd_iommu_resume(kfd); 1181f8846323SJames Zhu if (err) 118264d1c3a4SFelix Kuehling dev_err(kfd_device, 118364d1c3a4SFelix Kuehling "Failed to resume IOMMU for device %x:%x\n", 118464d1c3a4SFelix Kuehling kfd->pdev->vendor, kfd->pdev->device); 118564d1c3a4SFelix Kuehling return err; 118664d1c3a4SFelix Kuehling } 1187733fa1f7SYong Zhao 1188f8846323SJames Zhu static int kfd_resume(struct kfd_dev *kfd) 1189f8846323SJames Zhu { 1190f8846323SJames Zhu int err = 0; 1191f8846323SJames Zhu 1192b8935a7cSYong Zhao err = kfd->dqm->ops.start(kfd->dqm); 1193b8935a7cSYong Zhao if (err) { 1194b8935a7cSYong Zhao dev_err(kfd_device, 1195b8935a7cSYong Zhao "Error starting queue manager for device %x:%x\n", 1196b8935a7cSYong Zhao kfd->pdev->vendor, kfd->pdev->device); 1197b8935a7cSYong Zhao goto dqm_start_error; 1198b17f068aSOded Gabbay } 1199b17f068aSOded Gabbay 1200b8935a7cSYong Zhao return err; 1201b8935a7cSYong Zhao 1202b8935a7cSYong Zhao dqm_start_error: 120364d1c3a4SFelix Kuehling kfd_iommu_suspend(kfd); 1204b8935a7cSYong Zhao return err; 12054a488a7aSOded Gabbay } 12064a488a7aSOded Gabbay 1207b3eca59dSPhilip Yang static inline void kfd_queue_work(struct workqueue_struct *wq, 1208b3eca59dSPhilip Yang struct work_struct *work) 1209b3eca59dSPhilip Yang { 1210b3eca59dSPhilip Yang int cpu, new_cpu; 1211b3eca59dSPhilip Yang 1212b3eca59dSPhilip Yang cpu = new_cpu = smp_processor_id(); 1213b3eca59dSPhilip Yang do { 1214b3eca59dSPhilip Yang new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; 1215b3eca59dSPhilip Yang if (cpu_to_node(new_cpu) == numa_node_id()) 1216b3eca59dSPhilip Yang break; 1217b3eca59dSPhilip Yang } while (cpu != new_cpu); 1218b3eca59dSPhilip Yang 1219b3eca59dSPhilip Yang queue_work_on(new_cpu, wq, work); 1220b3eca59dSPhilip Yang } 1221b3eca59dSPhilip Yang 1222b3f5e6b4SAndrew Lewycky /* This is called directly from KGD at ISR. */ 1223b3f5e6b4SAndrew Lewycky void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 12244a488a7aSOded Gabbay { 122558e69886SLan Xiao uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; 122658e69886SLan Xiao bool is_patched = false; 12272383a767SChristian König unsigned long flags; 122858e69886SLan Xiao 12292249d558SAndrew Lewycky if (!kfd->init_complete) 12302249d558SAndrew Lewycky return; 12312249d558SAndrew Lewycky 123258e69886SLan Xiao if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { 123358e69886SLan Xiao dev_err_once(kfd_device, "Ring entry too small\n"); 123458e69886SLan Xiao return; 123558e69886SLan Xiao } 123658e69886SLan Xiao 12372383a767SChristian König spin_lock_irqsave(&kfd->interrupt_lock, flags); 12382249d558SAndrew Lewycky 12392249d558SAndrew Lewycky if (kfd->interrupts_active 124058e69886SLan Xiao && interrupt_is_wanted(kfd, ih_ring_entry, 124158e69886SLan Xiao patched_ihre, &is_patched) 124258e69886SLan Xiao && enqueue_ih_ring_entry(kfd, 124358e69886SLan Xiao is_patched ? patched_ihre : ih_ring_entry)) 1244b3eca59dSPhilip Yang kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); 12452249d558SAndrew Lewycky 12462383a767SChristian König spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 12474a488a7aSOded Gabbay } 12486e81090bSOded Gabbay 12496b95e797SFelix Kuehling int kgd2kfd_quiesce_mm(struct mm_struct *mm) 12506b95e797SFelix Kuehling { 12516b95e797SFelix Kuehling struct kfd_process *p; 12526b95e797SFelix Kuehling int r; 12536b95e797SFelix Kuehling 12546b95e797SFelix Kuehling /* Because we are called from arbitrary context (workqueue) as opposed 12556b95e797SFelix Kuehling * to process context, kfd_process could attempt to exit while we are 12566b95e797SFelix Kuehling * running so the lookup function increments the process ref count. 12576b95e797SFelix Kuehling */ 12586b95e797SFelix Kuehling p = kfd_lookup_process_by_mm(mm); 12596b95e797SFelix Kuehling if (!p) 12606b95e797SFelix Kuehling return -ESRCH; 12616b95e797SFelix Kuehling 1262b2057956SFelix Kuehling WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 12636b95e797SFelix Kuehling r = kfd_process_evict_queues(p); 12646b95e797SFelix Kuehling 12656b95e797SFelix Kuehling kfd_unref_process(p); 12666b95e797SFelix Kuehling return r; 12676b95e797SFelix Kuehling } 12686b95e797SFelix Kuehling 12696b95e797SFelix Kuehling int kgd2kfd_resume_mm(struct mm_struct *mm) 12706b95e797SFelix Kuehling { 12716b95e797SFelix Kuehling struct kfd_process *p; 12726b95e797SFelix Kuehling int r; 12736b95e797SFelix Kuehling 12746b95e797SFelix Kuehling /* Because we are called from arbitrary context (workqueue) as opposed 12756b95e797SFelix Kuehling * to process context, kfd_process could attempt to exit while we are 12766b95e797SFelix Kuehling * running so the lookup function increments the process ref count. 12776b95e797SFelix Kuehling */ 12786b95e797SFelix Kuehling p = kfd_lookup_process_by_mm(mm); 12796b95e797SFelix Kuehling if (!p) 12806b95e797SFelix Kuehling return -ESRCH; 12816b95e797SFelix Kuehling 12826b95e797SFelix Kuehling r = kfd_process_restore_queues(p); 12836b95e797SFelix Kuehling 12846b95e797SFelix Kuehling kfd_unref_process(p); 12856b95e797SFelix Kuehling return r; 12866b95e797SFelix Kuehling } 12876b95e797SFelix Kuehling 128826103436SFelix Kuehling /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will 128926103436SFelix Kuehling * prepare for safe eviction of KFD BOs that belong to the specified 129026103436SFelix Kuehling * process. 129126103436SFelix Kuehling * 129226103436SFelix Kuehling * @mm: mm_struct that identifies the specified KFD process 129326103436SFelix Kuehling * @fence: eviction fence attached to KFD process BOs 129426103436SFelix Kuehling * 129526103436SFelix Kuehling */ 129626103436SFelix Kuehling int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 129726103436SFelix Kuehling struct dma_fence *fence) 129826103436SFelix Kuehling { 129926103436SFelix Kuehling struct kfd_process *p; 130026103436SFelix Kuehling unsigned long active_time; 130126103436SFelix Kuehling unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); 130226103436SFelix Kuehling 130326103436SFelix Kuehling if (!fence) 130426103436SFelix Kuehling return -EINVAL; 130526103436SFelix Kuehling 130626103436SFelix Kuehling if (dma_fence_is_signaled(fence)) 130726103436SFelix Kuehling return 0; 130826103436SFelix Kuehling 130926103436SFelix Kuehling p = kfd_lookup_process_by_mm(mm); 131026103436SFelix Kuehling if (!p) 131126103436SFelix Kuehling return -ENODEV; 131226103436SFelix Kuehling 131326103436SFelix Kuehling if (fence->seqno == p->last_eviction_seqno) 131426103436SFelix Kuehling goto out; 131526103436SFelix Kuehling 131626103436SFelix Kuehling p->last_eviction_seqno = fence->seqno; 131726103436SFelix Kuehling 131826103436SFelix Kuehling /* Avoid KFD process starvation. Wait for at least 131926103436SFelix Kuehling * PROCESS_ACTIVE_TIME_MS before evicting the process again 132026103436SFelix Kuehling */ 132126103436SFelix Kuehling active_time = get_jiffies_64() - p->last_restore_timestamp; 132226103436SFelix Kuehling if (delay_jiffies > active_time) 132326103436SFelix Kuehling delay_jiffies -= active_time; 132426103436SFelix Kuehling else 132526103436SFelix Kuehling delay_jiffies = 0; 132626103436SFelix Kuehling 132726103436SFelix Kuehling /* During process initialization eviction_work.dwork is initialized 132826103436SFelix Kuehling * to kfd_evict_bo_worker 132926103436SFelix Kuehling */ 1330b2057956SFelix Kuehling WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", 1331b2057956SFelix Kuehling p->lead_thread->pid, delay_jiffies); 133226103436SFelix Kuehling schedule_delayed_work(&p->eviction_work, delay_jiffies); 133326103436SFelix Kuehling out: 133426103436SFelix Kuehling kfd_unref_process(p); 133526103436SFelix Kuehling return 0; 133626103436SFelix Kuehling } 133726103436SFelix Kuehling 13386e81090bSOded Gabbay static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 13396e81090bSOded Gabbay unsigned int chunk_size) 13406e81090bSOded Gabbay { 13418625ff9cSFelix Kuehling unsigned int num_of_longs; 13426e81090bSOded Gabbay 134332fa8219SFelix Kuehling if (WARN_ON(buf_size < chunk_size)) 134432fa8219SFelix Kuehling return -EINVAL; 134532fa8219SFelix Kuehling if (WARN_ON(buf_size == 0)) 134632fa8219SFelix Kuehling return -EINVAL; 134732fa8219SFelix Kuehling if (WARN_ON(chunk_size == 0)) 134832fa8219SFelix Kuehling return -EINVAL; 13496e81090bSOded Gabbay 13506e81090bSOded Gabbay kfd->gtt_sa_chunk_size = chunk_size; 13516e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; 13526e81090bSOded Gabbay 13538625ff9cSFelix Kuehling num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / 13548625ff9cSFelix Kuehling BITS_PER_LONG; 13556e81090bSOded Gabbay 13568625ff9cSFelix Kuehling kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); 13576e81090bSOded Gabbay 13586e81090bSOded Gabbay if (!kfd->gtt_sa_bitmap) 13596e81090bSOded Gabbay return -ENOMEM; 13606e81090bSOded Gabbay 136179775b62SKent Russell pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", 13626e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); 13636e81090bSOded Gabbay 13646e81090bSOded Gabbay mutex_init(&kfd->gtt_sa_lock); 13656e81090bSOded Gabbay 13666e81090bSOded Gabbay return 0; 13676e81090bSOded Gabbay 13686e81090bSOded Gabbay } 13696e81090bSOded Gabbay 13706e81090bSOded Gabbay static void kfd_gtt_sa_fini(struct kfd_dev *kfd) 13716e81090bSOded Gabbay { 13726e81090bSOded Gabbay mutex_destroy(&kfd->gtt_sa_lock); 13736e81090bSOded Gabbay kfree(kfd->gtt_sa_bitmap); 13746e81090bSOded Gabbay } 13756e81090bSOded Gabbay 13766e81090bSOded Gabbay static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, 13776e81090bSOded Gabbay unsigned int bit_num, 13786e81090bSOded Gabbay unsigned int chunk_size) 13796e81090bSOded Gabbay { 13806e81090bSOded Gabbay return start_addr + bit_num * chunk_size; 13816e81090bSOded Gabbay } 13826e81090bSOded Gabbay 13836e81090bSOded Gabbay static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, 13846e81090bSOded Gabbay unsigned int bit_num, 13856e81090bSOded Gabbay unsigned int chunk_size) 13866e81090bSOded Gabbay { 13876e81090bSOded Gabbay return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); 13886e81090bSOded Gabbay } 13896e81090bSOded Gabbay 13906e81090bSOded Gabbay int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 13916e81090bSOded Gabbay struct kfd_mem_obj **mem_obj) 13926e81090bSOded Gabbay { 13936e81090bSOded Gabbay unsigned int found, start_search, cur_size; 13946e81090bSOded Gabbay 13956e81090bSOded Gabbay if (size == 0) 13966e81090bSOded Gabbay return -EINVAL; 13976e81090bSOded Gabbay 13986e81090bSOded Gabbay if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) 13996e81090bSOded Gabbay return -ENOMEM; 14006e81090bSOded Gabbay 14011cd106ecSFelix Kuehling *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); 14021cd106ecSFelix Kuehling if (!(*mem_obj)) 14036e81090bSOded Gabbay return -ENOMEM; 14046e81090bSOded Gabbay 140579775b62SKent Russell pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); 14066e81090bSOded Gabbay 14076e81090bSOded Gabbay start_search = 0; 14086e81090bSOded Gabbay 14096e81090bSOded Gabbay mutex_lock(&kfd->gtt_sa_lock); 14106e81090bSOded Gabbay 14116e81090bSOded Gabbay kfd_gtt_restart_search: 14126e81090bSOded Gabbay /* Find the first chunk that is free */ 14136e81090bSOded Gabbay found = find_next_zero_bit(kfd->gtt_sa_bitmap, 14146e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks, 14156e81090bSOded Gabbay start_search); 14166e81090bSOded Gabbay 141779775b62SKent Russell pr_debug("Found = %d\n", found); 14186e81090bSOded Gabbay 14196e81090bSOded Gabbay /* If there wasn't any free chunk, bail out */ 14206e81090bSOded Gabbay if (found == kfd->gtt_sa_num_of_chunks) 14216e81090bSOded Gabbay goto kfd_gtt_no_free_chunk; 14226e81090bSOded Gabbay 14236e81090bSOded Gabbay /* Update fields of mem_obj */ 14246e81090bSOded Gabbay (*mem_obj)->range_start = found; 14256e81090bSOded Gabbay (*mem_obj)->range_end = found; 14266e81090bSOded Gabbay (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( 14276e81090bSOded Gabbay kfd->gtt_start_gpu_addr, 14286e81090bSOded Gabbay found, 14296e81090bSOded Gabbay kfd->gtt_sa_chunk_size); 14306e81090bSOded Gabbay (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( 14316e81090bSOded Gabbay kfd->gtt_start_cpu_ptr, 14326e81090bSOded Gabbay found, 14336e81090bSOded Gabbay kfd->gtt_sa_chunk_size); 14346e81090bSOded Gabbay 143579775b62SKent Russell pr_debug("gpu_addr = %p, cpu_addr = %p\n", 14366e81090bSOded Gabbay (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); 14376e81090bSOded Gabbay 14386e81090bSOded Gabbay /* If we need only one chunk, mark it as allocated and get out */ 14396e81090bSOded Gabbay if (size <= kfd->gtt_sa_chunk_size) { 144079775b62SKent Russell pr_debug("Single bit\n"); 14416e81090bSOded Gabbay set_bit(found, kfd->gtt_sa_bitmap); 14426e81090bSOded Gabbay goto kfd_gtt_out; 14436e81090bSOded Gabbay } 14446e81090bSOded Gabbay 14456e81090bSOded Gabbay /* Otherwise, try to see if we have enough contiguous chunks */ 14466e81090bSOded Gabbay cur_size = size - kfd->gtt_sa_chunk_size; 14476e81090bSOded Gabbay do { 14486e81090bSOded Gabbay (*mem_obj)->range_end = 14496e81090bSOded Gabbay find_next_zero_bit(kfd->gtt_sa_bitmap, 14506e81090bSOded Gabbay kfd->gtt_sa_num_of_chunks, ++found); 14516e81090bSOded Gabbay /* 14526e81090bSOded Gabbay * If next free chunk is not contiguous than we need to 14536e81090bSOded Gabbay * restart our search from the last free chunk we found (which 14546e81090bSOded Gabbay * wasn't contiguous to the previous ones 14556e81090bSOded Gabbay */ 14566e81090bSOded Gabbay if ((*mem_obj)->range_end != found) { 14576e81090bSOded Gabbay start_search = found; 14586e81090bSOded Gabbay goto kfd_gtt_restart_search; 14596e81090bSOded Gabbay } 14606e81090bSOded Gabbay 14616e81090bSOded Gabbay /* 14626e81090bSOded Gabbay * If we reached end of buffer, bail out with error 14636e81090bSOded Gabbay */ 14646e81090bSOded Gabbay if (found == kfd->gtt_sa_num_of_chunks) 14656e81090bSOded Gabbay goto kfd_gtt_no_free_chunk; 14666e81090bSOded Gabbay 14676e81090bSOded Gabbay /* Check if we don't need another chunk */ 14686e81090bSOded Gabbay if (cur_size <= kfd->gtt_sa_chunk_size) 14696e81090bSOded Gabbay cur_size = 0; 14706e81090bSOded Gabbay else 14716e81090bSOded Gabbay cur_size -= kfd->gtt_sa_chunk_size; 14726e81090bSOded Gabbay 14736e81090bSOded Gabbay } while (cur_size > 0); 14746e81090bSOded Gabbay 147579775b62SKent Russell pr_debug("range_start = %d, range_end = %d\n", 14766e81090bSOded Gabbay (*mem_obj)->range_start, (*mem_obj)->range_end); 14776e81090bSOded Gabbay 14786e81090bSOded Gabbay /* Mark the chunks as allocated */ 14796e81090bSOded Gabbay for (found = (*mem_obj)->range_start; 14806e81090bSOded Gabbay found <= (*mem_obj)->range_end; 14816e81090bSOded Gabbay found++) 14826e81090bSOded Gabbay set_bit(found, kfd->gtt_sa_bitmap); 14836e81090bSOded Gabbay 14846e81090bSOded Gabbay kfd_gtt_out: 14856e81090bSOded Gabbay mutex_unlock(&kfd->gtt_sa_lock); 14866e81090bSOded Gabbay return 0; 14876e81090bSOded Gabbay 14886e81090bSOded Gabbay kfd_gtt_no_free_chunk: 14893148a6a0SJack Zhang pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); 14906e81090bSOded Gabbay mutex_unlock(&kfd->gtt_sa_lock); 14913148a6a0SJack Zhang kfree(*mem_obj); 14926e81090bSOded Gabbay return -ENOMEM; 14936e81090bSOded Gabbay } 14946e81090bSOded Gabbay 14956e81090bSOded Gabbay int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) 14966e81090bSOded Gabbay { 14976e81090bSOded Gabbay unsigned int bit; 14986e81090bSOded Gabbay 14999216ed29SOded Gabbay /* Act like kfree when trying to free a NULL object */ 15009216ed29SOded Gabbay if (!mem_obj) 15019216ed29SOded Gabbay return 0; 15026e81090bSOded Gabbay 150379775b62SKent Russell pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", 15046e81090bSOded Gabbay mem_obj, mem_obj->range_start, mem_obj->range_end); 15056e81090bSOded Gabbay 15066e81090bSOded Gabbay mutex_lock(&kfd->gtt_sa_lock); 15076e81090bSOded Gabbay 15086e81090bSOded Gabbay /* Mark the chunks as free */ 15096e81090bSOded Gabbay for (bit = mem_obj->range_start; 15106e81090bSOded Gabbay bit <= mem_obj->range_end; 15116e81090bSOded Gabbay bit++) 15126e81090bSOded Gabbay clear_bit(bit, kfd->gtt_sa_bitmap); 15136e81090bSOded Gabbay 15146e81090bSOded Gabbay mutex_unlock(&kfd->gtt_sa_lock); 15156e81090bSOded Gabbay 15166e81090bSOded Gabbay kfree(mem_obj); 15176e81090bSOded Gabbay return 0; 15186e81090bSOded Gabbay } 1519a29ec470SShaoyun Liu 15209b54d201SEric Huang void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) 15219b54d201SEric Huang { 15229b54d201SEric Huang if (kfd) 15239b54d201SEric Huang atomic_inc(&kfd->sram_ecc_flag); 15249b54d201SEric Huang } 15259b54d201SEric Huang 152643d8107fSHarish Kasiviswanathan void kfd_inc_compute_active(struct kfd_dev *kfd) 152743d8107fSHarish Kasiviswanathan { 152843d8107fSHarish Kasiviswanathan if (atomic_inc_return(&kfd->compute_profile) == 1) 152943d8107fSHarish Kasiviswanathan amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); 153043d8107fSHarish Kasiviswanathan } 153143d8107fSHarish Kasiviswanathan 153243d8107fSHarish Kasiviswanathan void kfd_dec_compute_active(struct kfd_dev *kfd) 153343d8107fSHarish Kasiviswanathan { 153443d8107fSHarish Kasiviswanathan int count = atomic_dec_return(&kfd->compute_profile); 153543d8107fSHarish Kasiviswanathan 153643d8107fSHarish Kasiviswanathan if (count == 0) 153743d8107fSHarish Kasiviswanathan amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); 153843d8107fSHarish Kasiviswanathan WARN_ONCE(count < 0, "Compute profile ref. count error"); 153943d8107fSHarish Kasiviswanathan } 154043d8107fSHarish Kasiviswanathan 1541410e302eSGraham Sider void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 15422c2b0d88SMukul Joshi { 1543158fc08dSAmber Lin if (kfd && kfd->init_complete) 15442c2b0d88SMukul Joshi kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); 15452c2b0d88SMukul Joshi } 15462c2b0d88SMukul Joshi 1547a29ec470SShaoyun Liu #if defined(CONFIG_DEBUG_FS) 1548a29ec470SShaoyun Liu 1549a29ec470SShaoyun Liu /* This function will send a package to HIQ to hang the HWS 1550a29ec470SShaoyun Liu * which will trigger a GPU reset and bring the HWS back to normal state 1551a29ec470SShaoyun Liu */ 1552a29ec470SShaoyun Liu int kfd_debugfs_hang_hws(struct kfd_dev *dev) 1553a29ec470SShaoyun Liu { 1554a29ec470SShaoyun Liu if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { 1555a29ec470SShaoyun Liu pr_err("HWS is not enabled"); 1556a29ec470SShaoyun Liu return -EINVAL; 1557a29ec470SShaoyun Liu } 1558a29ec470SShaoyun Liu 15594f942aaeSOak Zeng return dqm_debugfs_hang_hws(dev->dqm); 1560a29ec470SShaoyun Liu } 1561a29ec470SShaoyun Liu 1562a29ec470SShaoyun Liu #endif 1563