1d87f36a0SRajneesh Bhardwaj // SPDX-License-Identifier: GPL-2.0 OR MIT
24a488a7aSOded Gabbay /*
3d87f36a0SRajneesh Bhardwaj  * Copyright 2014-2022 Advanced Micro Devices, Inc.
44a488a7aSOded Gabbay  *
54a488a7aSOded Gabbay  * Permission is hereby granted, free of charge, to any person obtaining a
64a488a7aSOded Gabbay  * copy of this software and associated documentation files (the "Software"),
74a488a7aSOded Gabbay  * to deal in the Software without restriction, including without limitation
84a488a7aSOded Gabbay  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
94a488a7aSOded Gabbay  * and/or sell copies of the Software, and to permit persons to whom the
104a488a7aSOded Gabbay  * Software is furnished to do so, subject to the following conditions:
114a488a7aSOded Gabbay  *
124a488a7aSOded Gabbay  * The above copyright notice and this permission notice shall be included in
134a488a7aSOded Gabbay  * all copies or substantial portions of the Software.
144a488a7aSOded Gabbay  *
154a488a7aSOded Gabbay  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
164a488a7aSOded Gabbay  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
174a488a7aSOded Gabbay  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
184a488a7aSOded Gabbay  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
194a488a7aSOded Gabbay  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
204a488a7aSOded Gabbay  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
214a488a7aSOded Gabbay  * OTHER DEALINGS IN THE SOFTWARE.
224a488a7aSOded Gabbay  */
234a488a7aSOded Gabbay 
244a488a7aSOded Gabbay #include <linux/bsearch.h>
254a488a7aSOded Gabbay #include <linux/pci.h>
264a488a7aSOded Gabbay #include <linux/slab.h>
274a488a7aSOded Gabbay #include "kfd_priv.h"
2864c7f8cfSBen Goz #include "kfd_device_queue_manager.h"
29507968ddSFelix Kuehling #include "kfd_pm4_headers_vi.h"
30fd6a440eSJonathan Kim #include "kfd_pm4_headers_aldebaran.h"
310db54b24SYong Zhao #include "cwsr_trap_handler.h"
3264d1c3a4SFelix Kuehling #include "kfd_iommu.h"
335b87245fSAmber Lin #include "amdgpu_amdkfd.h"
342c2b0d88SMukul Joshi #include "kfd_smi_events.h"
35814ab993SPhilip Yang #include "kfd_migrate.h"
365b983db8SAlex Deucher #include "amdgpu.h"
374a488a7aSOded Gabbay 
3819f6d2a6SOded Gabbay #define MQD_SIZE_ALIGNED 768
39e42051d2SShaoyun Liu 
40e42051d2SShaoyun Liu /*
41e42051d2SShaoyun Liu  * kfd_locked is used to lock the kfd driver during suspend or reset
42e42051d2SShaoyun Liu  * once locked, kfd driver will stop any further GPU execution.
43e42051d2SShaoyun Liu  * create process (open) will return -EAGAIN.
44e42051d2SShaoyun Liu  */
45e42051d2SShaoyun Liu static atomic_t kfd_locked = ATOMIC_INIT(0);
4619f6d2a6SOded Gabbay 
47a3e520a2SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK
48e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
49a3e520a2SAlex Deucher #endif
50e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
51e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
52e392c887SYong Zhao extern const struct kfd2kgd_calls arcturus_kfd2kgd;
535073506cSJonathan Kim extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
54f544afacSAmber Lin extern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd;
55e392c887SYong Zhao extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
563a2f0c81SYong Zhao extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
57cc009e61SMukul Joshi extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
58e392c887SYong Zhao 
596e81090bSOded Gabbay static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
606e81090bSOded Gabbay 				unsigned int chunk_size);
616e81090bSOded Gabbay static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
626e81090bSOded Gabbay 
637ee938acSFelix Kuehling static int kfd_resume_iommu(struct kfd_dev *kfd);
648dc1db31SMukul Joshi static int kfd_resume(struct kfd_node *kfd);
65b8935a7cSYong Zhao 
66cc009e61SMukul Joshi static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
67f89c6bf7SGuchun Chen {
68f89c6bf7SGuchun Chen 	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
69f89c6bf7SGuchun Chen 
70f89c6bf7SGuchun Chen 	switch (sdma_version) {
71f89c6bf7SGuchun Chen 	case IP_VERSION(4, 0, 0):/* VEGA10 */
72f89c6bf7SGuchun Chen 	case IP_VERSION(4, 0, 1):/* VEGA12 */
73f89c6bf7SGuchun Chen 	case IP_VERSION(4, 1, 0):/* RAVEN */
74f89c6bf7SGuchun Chen 	case IP_VERSION(4, 1, 1):/* RAVEN */
755eb877b2SKent Russell 	case IP_VERSION(4, 1, 2):/* RENOIR */
76f89c6bf7SGuchun Chen 	case IP_VERSION(5, 2, 1):/* VANGOGH */
77f89c6bf7SGuchun Chen 	case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
787c4f4f19SMario Limonciello 	case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
797c4f4f19SMario Limonciello 	case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
80f89c6bf7SGuchun Chen 		kfd->device_info.num_sdma_queues_per_engine = 2;
81f89c6bf7SGuchun Chen 		break;
82f89c6bf7SGuchun Chen 	case IP_VERSION(4, 2, 0):/* VEGA20 */
835eb877b2SKent Russell 	case IP_VERSION(4, 2, 2):/* ARCTURUS */
84f89c6bf7SGuchun Chen 	case IP_VERSION(4, 4, 0):/* ALDEBARAN */
85*1bd6dd21SMukul Joshi 	case IP_VERSION(4, 4, 2):
86f89c6bf7SGuchun Chen 	case IP_VERSION(5, 0, 0):/* NAVI10 */
87f89c6bf7SGuchun Chen 	case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
88f89c6bf7SGuchun Chen 	case IP_VERSION(5, 0, 2):/* NAVI14 */
89f89c6bf7SGuchun Chen 	case IP_VERSION(5, 0, 5):/* NAVI12 */
90f89c6bf7SGuchun Chen 	case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
915eb877b2SKent Russell 	case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
92f89c6bf7SGuchun Chen 	case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
93f89c6bf7SGuchun Chen 	case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
94cc009e61SMukul Joshi 	case IP_VERSION(6, 0, 0):
95efb4fd10SYifan Zhang 	case IP_VERSION(6, 0, 1):
9622dd871eSEric Huang 	case IP_VERSION(6, 0, 2):
975ddb5fe9SDavid Belanger 	case IP_VERSION(6, 0, 3):
98f89c6bf7SGuchun Chen 		kfd->device_info.num_sdma_queues_per_engine = 8;
99f89c6bf7SGuchun Chen 		break;
100f89c6bf7SGuchun Chen 	default:
101f89c6bf7SGuchun Chen 		dev_warn(kfd_device,
10220c5e425SGraham Sider 			"Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
103f89c6bf7SGuchun Chen 			sdma_version);
104f89c6bf7SGuchun Chen 		kfd->device_info.num_sdma_queues_per_engine = 8;
105f89c6bf7SGuchun Chen 	}
106cc009e61SMukul Joshi 
107cc009e61SMukul Joshi 	switch (sdma_version) {
108cc009e61SMukul Joshi 	case IP_VERSION(6, 0, 0):
10922dd871eSEric Huang 	case IP_VERSION(6, 0, 2):
1105ddb5fe9SDavid Belanger 	case IP_VERSION(6, 0, 3):
111cc009e61SMukul Joshi 		/* Reserve 1 for paging and 1 for gfx */
112cc009e61SMukul Joshi 		kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
113cc009e61SMukul Joshi 		/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
114cc009e61SMukul Joshi 		kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
115cc009e61SMukul Joshi 		break;
116e48e6a13SYifan Zhang 	case IP_VERSION(6, 0, 1):
117e48e6a13SYifan Zhang 		/* Reserve 1 for paging and 1 for gfx */
118e48e6a13SYifan Zhang 		kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
119e48e6a13SYifan Zhang 		/* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
120e48e6a13SYifan Zhang 		kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
121e48e6a13SYifan Zhang 		break;
122cc009e61SMukul Joshi 	default:
123cc009e61SMukul Joshi 		break;
124cc009e61SMukul Joshi 	}
125f89c6bf7SGuchun Chen }
126f89c6bf7SGuchun Chen 
127f89c6bf7SGuchun Chen static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
128f89c6bf7SGuchun Chen {
129f89c6bf7SGuchun Chen 	uint32_t gc_version = KFD_GC_VERSION(kfd);
130f89c6bf7SGuchun Chen 
131f89c6bf7SGuchun Chen 	switch (gc_version) {
132f89c6bf7SGuchun Chen 	case IP_VERSION(9, 0, 1): /* VEGA10 */
133f89c6bf7SGuchun Chen 	case IP_VERSION(9, 1, 0): /* RAVEN */
134f89c6bf7SGuchun Chen 	case IP_VERSION(9, 2, 1): /* VEGA12 */
135f89c6bf7SGuchun Chen 	case IP_VERSION(9, 2, 2): /* RAVEN */
136f89c6bf7SGuchun Chen 	case IP_VERSION(9, 3, 0): /* RENOIR */
137f89c6bf7SGuchun Chen 	case IP_VERSION(9, 4, 0): /* VEGA20 */
138f89c6bf7SGuchun Chen 	case IP_VERSION(9, 4, 1): /* ARCTURUS */
139f89c6bf7SGuchun Chen 	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
140f89c6bf7SGuchun Chen 	case IP_VERSION(10, 3, 1): /* VANGOGH */
141f89c6bf7SGuchun Chen 	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
1427c4f4f19SMario Limonciello 	case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
1437c4f4f19SMario Limonciello 	case IP_VERSION(10, 3, 7): /* GC 10.3.7 */
144f89c6bf7SGuchun Chen 	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
145f9ed188dSLang Yu 	case IP_VERSION(10, 1, 4):
146f89c6bf7SGuchun Chen 	case IP_VERSION(10, 1, 10): /* NAVI10 */
147f89c6bf7SGuchun Chen 	case IP_VERSION(10, 1, 2): /* NAVI12 */
148f89c6bf7SGuchun Chen 	case IP_VERSION(10, 1, 1): /* NAVI14 */
149f89c6bf7SGuchun Chen 	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
150f89c6bf7SGuchun Chen 	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
151f89c6bf7SGuchun Chen 	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
152f89c6bf7SGuchun Chen 	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
153f89c6bf7SGuchun Chen 		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
154f89c6bf7SGuchun Chen 		break;
155cc009e61SMukul Joshi 	case IP_VERSION(11, 0, 0):
15626776a70SHuang Rui 	case IP_VERSION(11, 0, 1):
157ec661f1cSEric Huang 	case IP_VERSION(11, 0, 2):
1585ddb5fe9SDavid Belanger 	case IP_VERSION(11, 0, 3):
15988c21c2bSYifan Zhang 	case IP_VERSION(11, 0, 4):
160cc009e61SMukul Joshi 		kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
161cc009e61SMukul Joshi 		break;
162f89c6bf7SGuchun Chen 	default:
163f89c6bf7SGuchun Chen 		dev_warn(kfd_device, "v9 event interrupt handler is set due to "
164f89c6bf7SGuchun Chen 			"mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
165f89c6bf7SGuchun Chen 		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
166f89c6bf7SGuchun Chen 	}
167f89c6bf7SGuchun Chen }
168f89c6bf7SGuchun Chen 
169f0dc99a6SGraham Sider static void kfd_device_info_init(struct kfd_dev *kfd,
170f0dc99a6SGraham Sider 				 bool vf, uint32_t gfx_target_version)
171f0dc99a6SGraham Sider {
172f0dc99a6SGraham Sider 	uint32_t gc_version = KFD_GC_VERSION(kfd);
173f0dc99a6SGraham Sider 	uint32_t asic_type = kfd->adev->asic_type;
174f0dc99a6SGraham Sider 
175f0dc99a6SGraham Sider 	kfd->device_info.max_pasid_bits = 16;
176f0dc99a6SGraham Sider 	kfd->device_info.max_no_of_hqd = 24;
177f0dc99a6SGraham Sider 	kfd->device_info.num_of_watch_points = 4;
178f0dc99a6SGraham Sider 	kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
179f0dc99a6SGraham Sider 	kfd->device_info.gfx_target_version = gfx_target_version;
180f0dc99a6SGraham Sider 
181f0dc99a6SGraham Sider 	if (KFD_IS_SOC15(kfd)) {
182f0dc99a6SGraham Sider 		kfd->device_info.doorbell_size = 8;
183f0dc99a6SGraham Sider 		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
184f0dc99a6SGraham Sider 		kfd->device_info.supports_cwsr = true;
185f0dc99a6SGraham Sider 
186cc009e61SMukul Joshi 		kfd_device_info_set_sdma_info(kfd);
187f89c6bf7SGuchun Chen 
188f89c6bf7SGuchun Chen 		kfd_device_info_set_event_interrupt_class(kfd);
189f0dc99a6SGraham Sider 
190f0dc99a6SGraham Sider 		/* Raven */
191f0dc99a6SGraham Sider 		if (gc_version == IP_VERSION(9, 1, 0) ||
192f0dc99a6SGraham Sider 		    gc_version == IP_VERSION(9, 2, 2))
193f0dc99a6SGraham Sider 			kfd->device_info.needs_iommu_device = true;
194f0dc99a6SGraham Sider 
195f0dc99a6SGraham Sider 		if (gc_version < IP_VERSION(11, 0, 0)) {
196f0dc99a6SGraham Sider 			/* Navi2x+, Navi1x+ */
19768e355c0SJesse Zhang 			if (gc_version == IP_VERSION(10, 3, 6))
19868e355c0SJesse Zhang 				kfd->device_info.no_atomic_fw_version = 14;
199c4e85551SPrike Liang 			else if (gc_version == IP_VERSION(10, 3, 7))
200c4e85551SPrike Liang 				kfd->device_info.no_atomic_fw_version = 3;
20168e355c0SJesse Zhang 			else if (gc_version >= IP_VERSION(10, 3, 0))
202f0dc99a6SGraham Sider 				kfd->device_info.no_atomic_fw_version = 92;
20327cc310fSchen gong 			else if (gc_version >= IP_VERSION(10, 1, 1))
20427cc310fSchen gong 				kfd->device_info.no_atomic_fw_version = 145;
205f0dc99a6SGraham Sider 
206f0dc99a6SGraham Sider 			/* Navi1x+ */
207f0dc99a6SGraham Sider 			if (gc_version >= IP_VERSION(10, 1, 1))
208f0dc99a6SGraham Sider 				kfd->device_info.needs_pci_atomics = true;
20900fa4035SSreekant Somasekharan 		} else if (gc_version < IP_VERSION(12, 0, 0)) {
21000fa4035SSreekant Somasekharan 			/*
21100fa4035SSreekant Somasekharan 			 * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires
21200fa4035SSreekant Somasekharan 			 * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require
21300fa4035SSreekant Somasekharan 			 * PCIe atomics support.
21400fa4035SSreekant Somasekharan 			 */
21500fa4035SSreekant Somasekharan 			kfd->device_info.needs_pci_atomics = true;
21600fa4035SSreekant Somasekharan 			kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
217f0dc99a6SGraham Sider 		}
218f0dc99a6SGraham Sider 	} else {
219f0dc99a6SGraham Sider 		kfd->device_info.doorbell_size = 4;
220f0dc99a6SGraham Sider 		kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
221f0dc99a6SGraham Sider 		kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
222f0dc99a6SGraham Sider 		kfd->device_info.num_sdma_queues_per_engine = 2;
223f0dc99a6SGraham Sider 
224f0dc99a6SGraham Sider 		if (asic_type != CHIP_KAVERI &&
225f0dc99a6SGraham Sider 		    asic_type != CHIP_HAWAII &&
226f0dc99a6SGraham Sider 		    asic_type != CHIP_TONGA)
227f0dc99a6SGraham Sider 			kfd->device_info.supports_cwsr = true;
228f0dc99a6SGraham Sider 
229f0dc99a6SGraham Sider 		if (asic_type == CHIP_KAVERI ||
230f0dc99a6SGraham Sider 		    asic_type == CHIP_CARRIZO)
231f0dc99a6SGraham Sider 			kfd->device_info.needs_iommu_device = true;
232f0dc99a6SGraham Sider 
233f0dc99a6SGraham Sider 		if (asic_type != CHIP_HAWAII && !vf)
234f0dc99a6SGraham Sider 			kfd->device_info.needs_pci_atomics = true;
235f0dc99a6SGraham Sider 	}
236f0dc99a6SGraham Sider }
237f0dc99a6SGraham Sider 
238b5d1d755SGraham Sider struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
2394a488a7aSOded Gabbay {
240f0dc99a6SGraham Sider 	struct kfd_dev *kfd = NULL;
241f0dc99a6SGraham Sider 	const struct kfd2kgd_calls *f2g = NULL;
242f0dc99a6SGraham Sider 	uint32_t gfx_target_version = 0;
243050091abSYong Zhao 
244c868d584SAlex Deucher 	switch (adev->asic_type) {
245c868d584SAlex Deucher #ifdef KFD_SUPPORT_IOMMU_V2
246c868d584SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK
247c868d584SAlex Deucher 	case CHIP_KAVERI:
248f0dc99a6SGraham Sider 		gfx_target_version = 70000;
249f0dc99a6SGraham Sider 		if (!vf)
250c868d584SAlex Deucher 			f2g = &gfx_v7_kfd2kgd;
251c868d584SAlex Deucher 		break;
252c868d584SAlex Deucher #endif
253c868d584SAlex Deucher 	case CHIP_CARRIZO:
254f0dc99a6SGraham Sider 		gfx_target_version = 80001;
255f0dc99a6SGraham Sider 		if (!vf)
256c868d584SAlex Deucher 			f2g = &gfx_v8_kfd2kgd;
257c868d584SAlex Deucher 		break;
258c868d584SAlex Deucher #endif
259c868d584SAlex Deucher #ifdef CONFIG_DRM_AMDGPU_CIK
260c868d584SAlex Deucher 	case CHIP_HAWAII:
261f0dc99a6SGraham Sider 		gfx_target_version = 70001;
2620f7ef0b9SFelix Kuehling 		if (!amdgpu_exp_hw_support)
2630f7ef0b9SFelix Kuehling 			pr_info(
2640f7ef0b9SFelix Kuehling 	"KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
2650f7ef0b9SFelix Kuehling 				);
2660f7ef0b9SFelix Kuehling 		else if (!vf)
267c868d584SAlex Deucher 			f2g = &gfx_v7_kfd2kgd;
268c868d584SAlex Deucher 		break;
269c868d584SAlex Deucher #endif
270c868d584SAlex Deucher 	case CHIP_TONGA:
271f0dc99a6SGraham Sider 		gfx_target_version = 80002;
272f0dc99a6SGraham Sider 		if (!vf)
273c868d584SAlex Deucher 			f2g = &gfx_v8_kfd2kgd;
274c868d584SAlex Deucher 		break;
275c868d584SAlex Deucher 	case CHIP_FIJI:
276c868d584SAlex Deucher 	case CHIP_POLARIS10:
277f0dc99a6SGraham Sider 		gfx_target_version = 80003;
278c868d584SAlex Deucher 		f2g = &gfx_v8_kfd2kgd;
279c868d584SAlex Deucher 		break;
280c868d584SAlex Deucher 	case CHIP_POLARIS11:
281c868d584SAlex Deucher 	case CHIP_POLARIS12:
282c868d584SAlex Deucher 	case CHIP_VEGAM:
283f0dc99a6SGraham Sider 		gfx_target_version = 80003;
284f0dc99a6SGraham Sider 		if (!vf)
285c868d584SAlex Deucher 			f2g = &gfx_v8_kfd2kgd;
286c868d584SAlex Deucher 		break;
287c868d584SAlex Deucher 	default:
288c868d584SAlex Deucher 		switch (adev->ip_versions[GC_HWIP][0]) {
2892c1f19b3SGraham Sider 		/* Vega 10 */
290c868d584SAlex Deucher 		case IP_VERSION(9, 0, 1):
291f0dc99a6SGraham Sider 			gfx_target_version = 90000;
292c868d584SAlex Deucher 			f2g = &gfx_v9_kfd2kgd;
293c868d584SAlex Deucher 			break;
294c868d584SAlex Deucher #ifdef KFD_SUPPORT_IOMMU_V2
2952c1f19b3SGraham Sider 		/* Raven */
296c868d584SAlex Deucher 		case IP_VERSION(9, 1, 0):
297c868d584SAlex Deucher 		case IP_VERSION(9, 2, 2):
298f0dc99a6SGraham Sider 			gfx_target_version = 90002;
299f0dc99a6SGraham Sider 			if (!vf)
300c868d584SAlex Deucher 				f2g = &gfx_v9_kfd2kgd;
301c868d584SAlex Deucher 			break;
302c868d584SAlex Deucher #endif
3032c1f19b3SGraham Sider 		/* Vega12 */
304c868d584SAlex Deucher 		case IP_VERSION(9, 2, 1):
305f0dc99a6SGraham Sider 			gfx_target_version = 90004;
306f0dc99a6SGraham Sider 			if (!vf)
307c868d584SAlex Deucher 				f2g = &gfx_v9_kfd2kgd;
308c868d584SAlex Deucher 			break;
3092c1f19b3SGraham Sider 		/* Renoir */
310c868d584SAlex Deucher 		case IP_VERSION(9, 3, 0):
311f0dc99a6SGraham Sider 			gfx_target_version = 90012;
312f0dc99a6SGraham Sider 			if (!vf)
313c868d584SAlex Deucher 				f2g = &gfx_v9_kfd2kgd;
314c868d584SAlex Deucher 			break;
3152c1f19b3SGraham Sider 		/* Vega20 */
316c868d584SAlex Deucher 		case IP_VERSION(9, 4, 0):
317f0dc99a6SGraham Sider 			gfx_target_version = 90006;
318f0dc99a6SGraham Sider 			if (!vf)
319c868d584SAlex Deucher 				f2g = &gfx_v9_kfd2kgd;
320c868d584SAlex Deucher 			break;
3212c1f19b3SGraham Sider 		/* Arcturus */
322c868d584SAlex Deucher 		case IP_VERSION(9, 4, 1):
323f0dc99a6SGraham Sider 			gfx_target_version = 90008;
324c868d584SAlex Deucher 			f2g = &arcturus_kfd2kgd;
325c868d584SAlex Deucher 			break;
3262c1f19b3SGraham Sider 		/* Aldebaran */
327c868d584SAlex Deucher 		case IP_VERSION(9, 4, 2):
328f0dc99a6SGraham Sider 			gfx_target_version = 90010;
329c868d584SAlex Deucher 			f2g = &aldebaran_kfd2kgd;
330c868d584SAlex Deucher 			break;
33170bdfedaSGraham Sider 		case IP_VERSION(9, 4, 3):
33270bdfedaSGraham Sider 			gfx_target_version = 90400;
333f544afacSAmber Lin 			f2g = &gc_9_4_3_kfd2kgd;
33470bdfedaSGraham Sider 			break;
3352c1f19b3SGraham Sider 		/* Navi10 */
336c868d584SAlex Deucher 		case IP_VERSION(10, 1, 10):
337f0dc99a6SGraham Sider 			gfx_target_version = 100100;
338f0dc99a6SGraham Sider 			if (!vf)
339c868d584SAlex Deucher 				f2g = &gfx_v10_kfd2kgd;
340c868d584SAlex Deucher 			break;
3412c1f19b3SGraham Sider 		/* Navi12 */
342c868d584SAlex Deucher 		case IP_VERSION(10, 1, 2):
343f0dc99a6SGraham Sider 			gfx_target_version = 100101;
344c868d584SAlex Deucher 			f2g = &gfx_v10_kfd2kgd;
345c868d584SAlex Deucher 			break;
3462c1f19b3SGraham Sider 		/* Navi14 */
347c868d584SAlex Deucher 		case IP_VERSION(10, 1, 1):
348f0dc99a6SGraham Sider 			gfx_target_version = 100102;
349f0dc99a6SGraham Sider 			if (!vf)
350c868d584SAlex Deucher 				f2g = &gfx_v10_kfd2kgd;
351c868d584SAlex Deucher 			break;
3522c1f19b3SGraham Sider 		/* Cyan Skillfish */
353c868d584SAlex Deucher 		case IP_VERSION(10, 1, 3):
354f9ed188dSLang Yu 		case IP_VERSION(10, 1, 4):
355f0dc99a6SGraham Sider 			gfx_target_version = 100103;
356f0dc99a6SGraham Sider 			if (!vf)
357c868d584SAlex Deucher 				f2g = &gfx_v10_kfd2kgd;
358c868d584SAlex Deucher 			break;
3592c1f19b3SGraham Sider 		/* Sienna Cichlid */
360c868d584SAlex Deucher 		case IP_VERSION(10, 3, 0):
361f0dc99a6SGraham Sider 			gfx_target_version = 100300;
362c868d584SAlex Deucher 			f2g = &gfx_v10_3_kfd2kgd;
363c868d584SAlex Deucher 			break;
3642c1f19b3SGraham Sider 		/* Navy Flounder */
365c868d584SAlex Deucher 		case IP_VERSION(10, 3, 2):
366f0dc99a6SGraham Sider 			gfx_target_version = 100301;
367c868d584SAlex Deucher 			f2g = &gfx_v10_3_kfd2kgd;
368c868d584SAlex Deucher 			break;
3692c1f19b3SGraham Sider 		/* Van Gogh */
370c868d584SAlex Deucher 		case IP_VERSION(10, 3, 1):
371f0dc99a6SGraham Sider 			gfx_target_version = 100303;
372f0dc99a6SGraham Sider 			if (!vf)
373c868d584SAlex Deucher 				f2g = &gfx_v10_3_kfd2kgd;
374c868d584SAlex Deucher 			break;
3752c1f19b3SGraham Sider 		/* Dimgrey Cavefish */
376c868d584SAlex Deucher 		case IP_VERSION(10, 3, 4):
377f0dc99a6SGraham Sider 			gfx_target_version = 100302;
378c868d584SAlex Deucher 			f2g = &gfx_v10_3_kfd2kgd;
379c868d584SAlex Deucher 			break;
3802c1f19b3SGraham Sider 		/* Beige Goby */
381c868d584SAlex Deucher 		case IP_VERSION(10, 3, 5):
382f0dc99a6SGraham Sider 			gfx_target_version = 100304;
383c868d584SAlex Deucher 			f2g = &gfx_v10_3_kfd2kgd;
384c868d584SAlex Deucher 			break;
3852c1f19b3SGraham Sider 		/* Yellow Carp */
386c868d584SAlex Deucher 		case IP_VERSION(10, 3, 3):
387f0dc99a6SGraham Sider 			gfx_target_version = 100305;
388f0dc99a6SGraham Sider 			if (!vf)
389c868d584SAlex Deucher 				f2g = &gfx_v10_3_kfd2kgd;
390c868d584SAlex Deucher 			break;
3917c4f4f19SMario Limonciello 		case IP_VERSION(10, 3, 6):
3927c4f4f19SMario Limonciello 		case IP_VERSION(10, 3, 7):
3932724efa3SPrike Liang 			gfx_target_version = 100306;
3947c4f4f19SMario Limonciello 			if (!vf)
3957c4f4f19SMario Limonciello 				f2g = &gfx_v10_3_kfd2kgd;
3967c4f4f19SMario Limonciello 			break;
397cc009e61SMukul Joshi 		case IP_VERSION(11, 0, 0):
398cc009e61SMukul Joshi 			gfx_target_version = 110000;
399cc009e61SMukul Joshi 			f2g = &gfx_v11_kfd2kgd;
400cc009e61SMukul Joshi 			break;
40126776a70SHuang Rui 		case IP_VERSION(11, 0, 1):
40288c21c2bSYifan Zhang 		case IP_VERSION(11, 0, 4):
40326776a70SHuang Rui 			gfx_target_version = 110003;
40426776a70SHuang Rui 			f2g = &gfx_v11_kfd2kgd;
40526776a70SHuang Rui 			break;
406ec661f1cSEric Huang 		case IP_VERSION(11, 0, 2):
407ec661f1cSEric Huang 			gfx_target_version = 110002;
408ec661f1cSEric Huang 			f2g = &gfx_v11_kfd2kgd;
409ec661f1cSEric Huang 			break;
4105ddb5fe9SDavid Belanger 		case IP_VERSION(11, 0, 3):
4115ddb5fe9SDavid Belanger 			/* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
4125ddb5fe9SDavid Belanger 			gfx_target_version = 110001;
4135ddb5fe9SDavid Belanger 			f2g = &gfx_v11_kfd2kgd;
4145ddb5fe9SDavid Belanger 			break;
415c868d584SAlex Deucher 		default:
416f0dc99a6SGraham Sider 			break;
417050091abSYong Zhao 		}
418c868d584SAlex Deucher 		break;
419c868d584SAlex Deucher 	}
4204a488a7aSOded Gabbay 
421f0dc99a6SGraham Sider 	if (!f2g) {
422e4804a39SGraham Sider 		if (adev->ip_versions[GC_HWIP][0])
423e4804a39SGraham Sider 			dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
424e4804a39SGraham Sider 				adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
425e4804a39SGraham Sider 		else
426050091abSYong Zhao 			dev_err(kfd_device, "%s %s not supported in kfd\n",
427c868d584SAlex Deucher 				amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
4284a488a7aSOded Gabbay 		return NULL;
4294ebc7182SYong Zhao 	}
4304a488a7aSOded Gabbay 
431d35f00d8SEric Huang 	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
432d35f00d8SEric Huang 	if (!kfd)
433d35f00d8SEric Huang 		return NULL;
434d35f00d8SEric Huang 
435c6c57446SGraham Sider 	kfd->adev = adev;
436f0dc99a6SGraham Sider 	kfd_device_info_init(kfd, vf, gfx_target_version);
43719f6d2a6SOded Gabbay 	kfd->init_complete = false;
438cea405b1SXihan Zhang 	kfd->kfd2kgd = f2g;
43943d8107fSHarish Kasiviswanathan 	atomic_set(&kfd->compute_profile, 0);
440cea405b1SXihan Zhang 
441cea405b1SXihan Zhang 	mutex_init(&kfd->doorbell_mutex);
442cea405b1SXihan Zhang 	memset(&kfd->doorbell_available_index, 0,
443cea405b1SXihan Zhang 		sizeof(kfd->doorbell_available_index));
4444a488a7aSOded Gabbay 
44559d7115dSMukul Joshi 	ida_init(&kfd->doorbell_ida);
44659d7115dSMukul Joshi 
4474a488a7aSOded Gabbay 	return kfd;
4484a488a7aSOded Gabbay }
4494a488a7aSOded Gabbay 
450373d7080SFelix Kuehling static void kfd_cwsr_init(struct kfd_dev *kfd)
451373d7080SFelix Kuehling {
452f0dc99a6SGraham Sider 	if (cwsr_enable && kfd->device_info.supports_cwsr) {
453046e674bSGraham Sider 		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
454373d7080SFelix Kuehling 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
455373d7080SFelix Kuehling 			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
456373d7080SFelix Kuehling 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
457046e674bSGraham Sider 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
4583baa24f0SOak Zeng 			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
4593baa24f0SOak Zeng 			kfd->cwsr_isa = cwsr_trap_arcturus_hex;
4603baa24f0SOak Zeng 			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
461046e674bSGraham Sider 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
4620ef6845cSJay Cornwall 			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
4630ef6845cSJay Cornwall 			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
4640ef6845cSJay Cornwall 			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
465828d9a87SHawking Zhang 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
4661d44ff3dSJay Cornwall 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
4671d44ff3dSJay Cornwall 			kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
4681d44ff3dSJay Cornwall 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
469046e674bSGraham Sider 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
4703e76c239SFelix Kuehling 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
4713e76c239SFelix Kuehling 			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
4723e76c239SFelix Kuehling 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
473046e674bSGraham Sider 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
47480b6cfedSJay Cornwall 			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
47580b6cfedSJay Cornwall 			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
47680b6cfedSJay Cornwall 			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
4776a817038SJay Cornwall 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
47814328aa5SPhilip Cox 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
47914328aa5SPhilip Cox 			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
48014328aa5SPhilip Cox 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
4816a817038SJay Cornwall 		} else {
4826a817038SJay Cornwall 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
4836a817038SJay Cornwall 			kfd->cwsr_isa = cwsr_trap_gfx11_hex;
4846a817038SJay Cornwall 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
4853e76c239SFelix Kuehling 		}
4863e76c239SFelix Kuehling 
487373d7080SFelix Kuehling 		kfd->cwsr_enabled = true;
488373d7080SFelix Kuehling 	}
489373d7080SFelix Kuehling }
490373d7080SFelix Kuehling 
4918dc1db31SMukul Joshi static int kfd_gws_init(struct kfd_node *node)
49229633d0eSJoseph Greathouse {
49329633d0eSJoseph Greathouse 	int ret = 0;
4948dc1db31SMukul Joshi 	struct kfd_dev *kfd = node->kfd;
49529633d0eSJoseph Greathouse 
4968dc1db31SMukul Joshi 	if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
49729633d0eSJoseph Greathouse 		return 0;
49829633d0eSJoseph Greathouse 
4998dc1db31SMukul Joshi 	if (hws_gws_support || (KFD_IS_SOC15(node) &&
5008dc1db31SMukul Joshi 		((KFD_GC_VERSION(node) == IP_VERSION(9, 0, 1)
501046e674bSGraham Sider 			&& kfd->mec2_fw_version >= 0x81b3) ||
5028dc1db31SMukul Joshi 		(KFD_GC_VERSION(node) <= IP_VERSION(9, 4, 0)
503046e674bSGraham Sider 			&& kfd->mec2_fw_version >= 0x1b3)  ||
5048dc1db31SMukul Joshi 		(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 1)
505046e674bSGraham Sider 			&& kfd->mec2_fw_version >= 0x30)   ||
5068dc1db31SMukul Joshi 		(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
507beb15bc1SJonathan Kim 			&& kfd->mec2_fw_version >= 0x28) ||
5088dc1db31SMukul Joshi 		(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
5098dc1db31SMukul Joshi 			&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
510beb15bc1SJonathan Kim 			&& kfd->mec2_fw_version >= 0x6b))))
5118dc1db31SMukul Joshi 		ret = amdgpu_amdkfd_alloc_gws(node->adev,
5128dc1db31SMukul Joshi 				node->adev->gds.gws_size, &node->gws);
51329633d0eSJoseph Greathouse 
51429633d0eSJoseph Greathouse 	return ret;
51529633d0eSJoseph Greathouse }
51629633d0eSJoseph Greathouse 
5178dc1db31SMukul Joshi static void kfd_smi_init(struct kfd_node *dev)
5182243f493SRajneesh Bhardwaj {
519938a0650SAmber Lin 	INIT_LIST_HEAD(&dev->smi_clients);
520938a0650SAmber Lin 	spin_lock_init(&dev->smi_lock);
521938a0650SAmber Lin }
522938a0650SAmber Lin 
5238dc1db31SMukul Joshi static int kfd_init_node(struct kfd_node *node)
5248dc1db31SMukul Joshi {
5258dc1db31SMukul Joshi 	int err = -1;
5268dc1db31SMukul Joshi 
5278dc1db31SMukul Joshi 	if (kfd_interrupt_init(node)) {
5288dc1db31SMukul Joshi 		dev_err(kfd_device, "Error initializing interrupts\n");
5298dc1db31SMukul Joshi 		goto kfd_interrupt_error;
5308dc1db31SMukul Joshi 	}
5318dc1db31SMukul Joshi 
5328dc1db31SMukul Joshi 	node->dqm = device_queue_manager_init(node);
5338dc1db31SMukul Joshi 	if (!node->dqm) {
5348dc1db31SMukul Joshi 		dev_err(kfd_device, "Error initializing queue manager\n");
5358dc1db31SMukul Joshi 		goto device_queue_manager_error;
5368dc1db31SMukul Joshi 	}
5378dc1db31SMukul Joshi 
5388dc1db31SMukul Joshi 	if (kfd_gws_init(node)) {
5398dc1db31SMukul Joshi 		dev_err(kfd_device, "Could not allocate %d gws\n",
5408dc1db31SMukul Joshi 			node->adev->gds.gws_size);
5418dc1db31SMukul Joshi 		goto gws_error;
5428dc1db31SMukul Joshi 	}
5438dc1db31SMukul Joshi 
5448dc1db31SMukul Joshi 	if (kfd_resume(node))
5458dc1db31SMukul Joshi 		goto kfd_resume_error;
5468dc1db31SMukul Joshi 
5478dc1db31SMukul Joshi 	if (kfd_topology_add_device(node)) {
5488dc1db31SMukul Joshi 		dev_err(kfd_device, "Error adding device to topology\n");
5498dc1db31SMukul Joshi 		goto kfd_topology_add_device_error;
5508dc1db31SMukul Joshi 	}
5518dc1db31SMukul Joshi 
5528dc1db31SMukul Joshi 	kfd_smi_init(node);
5538dc1db31SMukul Joshi 
5548dc1db31SMukul Joshi 	return 0;
5558dc1db31SMukul Joshi 
5568dc1db31SMukul Joshi kfd_topology_add_device_error:
5578dc1db31SMukul Joshi kfd_resume_error:
5588dc1db31SMukul Joshi gws_error:
5598dc1db31SMukul Joshi 	device_queue_manager_uninit(node->dqm);
5608dc1db31SMukul Joshi device_queue_manager_error:
5618dc1db31SMukul Joshi 	kfd_interrupt_exit(node);
5628dc1db31SMukul Joshi kfd_interrupt_error:
5638dc1db31SMukul Joshi 	if (node->gws)
5648dc1db31SMukul Joshi 		amdgpu_amdkfd_free_gws(node->adev, node->gws);
5658dc1db31SMukul Joshi 
5668dc1db31SMukul Joshi 	/* Cleanup the node memory here */
5678dc1db31SMukul Joshi 	kfree(node);
5688dc1db31SMukul Joshi 	return err;
5698dc1db31SMukul Joshi }
5708dc1db31SMukul Joshi 
57174c5b85dSMukul Joshi static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
5728dc1db31SMukul Joshi {
57374c5b85dSMukul Joshi 	struct kfd_node *knode;
57474c5b85dSMukul Joshi 	unsigned int i;
5758dc1db31SMukul Joshi 
57674c5b85dSMukul Joshi 	for (i = 0; i < num_nodes; i++) {
57774c5b85dSMukul Joshi 		knode = kfd->nodes[i];
5788dc1db31SMukul Joshi 		device_queue_manager_uninit(knode->dqm);
5798dc1db31SMukul Joshi 		kfd_interrupt_exit(knode);
5808dc1db31SMukul Joshi 		kfd_topology_remove_device(knode);
5818dc1db31SMukul Joshi 		if (knode->gws)
5828dc1db31SMukul Joshi 			amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
5838dc1db31SMukul Joshi 		kfree(knode);
58474c5b85dSMukul Joshi 		kfd->nodes[i] = NULL;
58574c5b85dSMukul Joshi 	}
5868dc1db31SMukul Joshi }
5878dc1db31SMukul Joshi 
5884a488a7aSOded Gabbay bool kgd2kfd_device_init(struct kfd_dev *kfd,
5894a488a7aSOded Gabbay 			 const struct kgd2kfd_shared_resources *gpu_resources)
5904a488a7aSOded Gabbay {
59174c5b85dSMukul Joshi 	unsigned int size, map_process_packet_size, i;
5928dc1db31SMukul Joshi 	struct kfd_node *node;
5938dc1db31SMukul Joshi 	uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
5948dc1db31SMukul Joshi 	unsigned int max_proc_per_quantum;
59519f6d2a6SOded Gabbay 
596574c4183SGraham Sider 	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
5975ade6c9cSFelix Kuehling 			KGD_ENGINE_MEC1);
598574c4183SGraham Sider 	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
59929633d0eSJoseph Greathouse 			KGD_ENGINE_MEC2);
600574c4183SGraham Sider 	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
6015ade6c9cSFelix Kuehling 			KGD_ENGINE_SDMA1);
6024a488a7aSOded Gabbay 	kfd->shared_resources = *gpu_resources;
6034a488a7aSOded Gabbay 
60474c5b85dSMukul Joshi 	if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 ||
60574c5b85dSMukul Joshi 	    kfd->adev->gfx.num_xcc_per_xcp == 0)
60674c5b85dSMukul Joshi 		kfd->num_nodes = 1;
60774c5b85dSMukul Joshi 	else
60874c5b85dSMukul Joshi 		kfd->num_nodes =
60974c5b85dSMukul Joshi 			kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp;
61074c5b85dSMukul Joshi 	if (kfd->num_nodes == 0) {
61174c5b85dSMukul Joshi 		dev_err(kfd_device,
61274c5b85dSMukul Joshi 			"KFD num nodes cannot be 0, GC inst: %d, num_xcc_in_node: %d\n",
61374c5b85dSMukul Joshi 			kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp);
61474c5b85dSMukul Joshi 		goto out;
61574c5b85dSMukul Joshi 	}
61644008d7aSYong Zhao 
617e312af6cSFelix Kuehling 	/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
618e312af6cSFelix Kuehling 	 * 32 and 64-bit requests are possible and must be
619e312af6cSFelix Kuehling 	 * supported.
620e312af6cSFelix Kuehling 	 */
6216bfc7c7eSGraham Sider 	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
622e312af6cSFelix Kuehling 	if (!kfd->pci_atomic_requested &&
623f0dc99a6SGraham Sider 	    kfd->device_info.needs_pci_atomics &&
624f0dc99a6SGraham Sider 	    (!kfd->device_info.no_atomic_fw_version ||
625f0dc99a6SGraham Sider 	     kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
626e312af6cSFelix Kuehling 		dev_info(kfd_device,
627e312af6cSFelix Kuehling 			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
628d69a3b76SMukul Joshi 			 kfd->adev->pdev->vendor, kfd->adev->pdev->device,
629e312af6cSFelix Kuehling 			 kfd->mec_fw_version,
630f0dc99a6SGraham Sider 			 kfd->device_info.no_atomic_fw_version);
631e312af6cSFelix Kuehling 		return false;
632e312af6cSFelix Kuehling 	}
633e312af6cSFelix Kuehling 
63474c5b85dSMukul Joshi 	first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
63574c5b85dSMukul Joshi 	last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
63674c5b85dSMukul Joshi 	vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
63774c5b85dSMukul Joshi 
63874c5b85dSMukul Joshi 	/* For GFX9.4.3, we need special handling for VMIDs depending on
63974c5b85dSMukul Joshi 	 * partition mode.
64074c5b85dSMukul Joshi 	 * In CPX mode, the VMID range needs to be shared between XCDs.
64174c5b85dSMukul Joshi 	 * Additionally, there are 13 VMIDs (3-15) available for KFD. To
64274c5b85dSMukul Joshi 	 * divide them equally, we change starting VMID to 4 and not use
64374c5b85dSMukul Joshi 	 * VMID 3.
64474c5b85dSMukul Joshi 	 * If the VMID range changes for GFX9.4.3, then this code MUST be
64574c5b85dSMukul Joshi 	 * revisited.
64674c5b85dSMukul Joshi 	 */
64774c5b85dSMukul Joshi 	if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
64874c5b85dSMukul Joshi 	    kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
64974c5b85dSMukul Joshi 	    kfd->num_nodes != 1) {
65074c5b85dSMukul Joshi 		vmid_num_kfd /= 2;
65174c5b85dSMukul Joshi 		first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2;
65274c5b85dSMukul Joshi 	}
65374c5b85dSMukul Joshi 
654a99c6d4fSFelix Kuehling 	/* Verify module parameters regarding mapped process number*/
655b7dfbd2eSTushar Patel 	if (hws_max_conc_proc >= 0)
6568dc1db31SMukul Joshi 		max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd);
657b7dfbd2eSTushar Patel 	else
6588dc1db31SMukul Joshi 		max_proc_per_quantum = vmid_num_kfd;
659a99c6d4fSFelix Kuehling 
66019f6d2a6SOded Gabbay 	/* calculate max size of mqds needed for queues */
661b8cbab04SOded Gabbay 	size = max_num_of_queues_per_device *
662f0dc99a6SGraham Sider 			kfd->device_info.mqd_size_aligned;
66319f6d2a6SOded Gabbay 
664e18e794eSOded Gabbay 	/*
665e18e794eSOded Gabbay 	 * calculate max size of runlist packet.
666e18e794eSOded Gabbay 	 * There can be only 2 packets at once
667e18e794eSOded Gabbay 	 */
668046e674bSGraham Sider 	map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
669fd6a440eSJonathan Kim 				sizeof(struct pm4_mes_map_process_aldebaran) :
670fd6a440eSJonathan Kim 				sizeof(struct pm4_mes_map_process);
671fd6a440eSJonathan Kim 	size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
672507968ddSFelix Kuehling 		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
673507968ddSFelix Kuehling 		+ sizeof(struct pm4_mes_runlist)) * 2;
674e18e794eSOded Gabbay 
675e18e794eSOded Gabbay 	/* Add size of HIQ & DIQ */
676e18e794eSOded Gabbay 	size += KFD_KERNEL_QUEUE_SIZE * 2;
677e18e794eSOded Gabbay 
678e18e794eSOded Gabbay 	/* add another 512KB for all other allocations on gart (HPD, fences) */
67919f6d2a6SOded Gabbay 	size += 512 * 1024;
68019f6d2a6SOded Gabbay 
6817cd52c91SAmber Lin 	if (amdgpu_amdkfd_alloc_gtt_mem(
6826bfc7c7eSGraham Sider 			kfd->adev, size, &kfd->gtt_mem,
68315426dbbSYong Zhao 			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
68415426dbbSYong Zhao 			false)) {
68579775b62SKent Russell 		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
686e09d4fc8SOak Zeng 		goto alloc_gtt_mem_failure;
68719f6d2a6SOded Gabbay 	}
68819f6d2a6SOded Gabbay 
68979775b62SKent Russell 	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
690e18e794eSOded Gabbay 
69173a1da0bSOded Gabbay 	/* Initialize GTT sa with 512 byte chunk size */
69273a1da0bSOded Gabbay 	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
69379775b62SKent Russell 		dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
69473a1da0bSOded Gabbay 		goto kfd_gtt_sa_init_error;
69573a1da0bSOded Gabbay 	}
69673a1da0bSOded Gabbay 
697735df2baSFelix Kuehling 	if (kfd_doorbell_init(kfd)) {
698735df2baSFelix Kuehling 		dev_err(kfd_device,
699735df2baSFelix Kuehling 			"Error initializing doorbell aperture\n");
700735df2baSFelix Kuehling 		goto kfd_doorbell_error;
701735df2baSFelix Kuehling 	}
70219f6d2a6SOded Gabbay 
703c5650327SDivya Shikre 	if (amdgpu_use_xgmi_p2p)
70402274fc0SGraham Sider 		kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
7050c1690e3SShaoyun Liu 
70692085240SJonathan Kim 	/*
70792085240SJonathan Kim 	 * For GFX9.4.3, the KFD abstracts all partitions within a socket as
70892085240SJonathan Kim 	 * xGMI connected in the topology so assign a unique hive id per
70992085240SJonathan Kim 	 * device based on the pci device location if device is in PCIe mode.
71092085240SJonathan Kim 	 */
71192085240SJonathan Kim 	if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1)
71292085240SJonathan Kim 		kfd->hive_id = pci_dev_id(kfd->adev->pdev);
71392085240SJonathan Kim 
71402274fc0SGraham Sider 	kfd->noretry = kfd->adev->gmc.noretry;
7159b498efaSAlex Deucher 
7166127896fSHuang Rui 	/* If CRAT is broken, won't set iommu enabled */
7176127896fSHuang Rui 	kfd_double_confirm_iommu_support(kfd);
7186127896fSHuang Rui 
71964d1c3a4SFelix Kuehling 	if (kfd_iommu_device_init(kfd)) {
7206f4b590aSYifan Zhang 		kfd->use_iommu_v2 = false;
72164d1c3a4SFelix Kuehling 		dev_err(kfd_device, "Error initializing iommuv2\n");
72264d1c3a4SFelix Kuehling 		goto device_iommu_error;
72364c7f8cfSBen Goz 	}
72464c7f8cfSBen Goz 
725373d7080SFelix Kuehling 	kfd_cwsr_init(kfd);
726373d7080SFelix Kuehling 
72774c5b85dSMukul Joshi 	/* TODO: Needs to be updated for memory partitioning */
72856c5977eSGraham Sider 	svm_migrate_init(kfd->adev);
729814ab993SPhilip Yang 
7308dc1db31SMukul Joshi 	/* Allocate the KFD node */
7318dc1db31SMukul Joshi 	node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
7328dc1db31SMukul Joshi 	if (!node) {
7338dc1db31SMukul Joshi 		dev_err(kfd_device, "Error allocating KFD node\n");
7348dc1db31SMukul Joshi 		goto node_alloc_error;
735465ab9e0SOak Zeng 	}
736465ab9e0SOak Zeng 
7378dc1db31SMukul Joshi 	node->adev = kfd->adev;
7388dc1db31SMukul Joshi 	node->kfd = kfd;
7398dc1db31SMukul Joshi 	node->kfd2kgd = kfd->kfd2kgd;
7408dc1db31SMukul Joshi 	node->vm_info.vmid_num_kfd = vmid_num_kfd;
7418dc1db31SMukul Joshi 	node->vm_info.first_vmid_kfd = first_vmid_kfd;
7428dc1db31SMukul Joshi 	node->vm_info.last_vmid_kfd = last_vmid_kfd;
7438dc1db31SMukul Joshi 	node->max_proc_per_quantum = max_proc_per_quantum;
7448dc1db31SMukul Joshi 	atomic_set(&node->sram_ecc_flag, 0);
7458dc1db31SMukul Joshi 
74674c5b85dSMukul Joshi 	dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
74774c5b85dSMukul Joshi 				kfd->num_nodes);
74874c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
74974c5b85dSMukul Joshi 		node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
75074c5b85dSMukul Joshi 		if (!node)
75174c5b85dSMukul Joshi 			goto node_alloc_error;
75274c5b85dSMukul Joshi 
753a805889aSMukul Joshi 		node->node_id = i;
75474c5b85dSMukul Joshi 		node->adev = kfd->adev;
75574c5b85dSMukul Joshi 		node->kfd = kfd;
75674c5b85dSMukul Joshi 		node->kfd2kgd = kfd->kfd2kgd;
75774c5b85dSMukul Joshi 		node->vm_info.vmid_num_kfd = vmid_num_kfd;
75874c5b85dSMukul Joshi 		node->num_xcc_per_node = max(1U, kfd->adev->gfx.num_xcc_per_xcp);
75974c5b85dSMukul Joshi 		node->start_xcc_id = node->num_xcc_per_node * i;
76074c5b85dSMukul Joshi 
76174c5b85dSMukul Joshi 		if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
76274c5b85dSMukul Joshi 		    kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE &&
76374c5b85dSMukul Joshi 		    kfd->num_nodes != 1) {
76474c5b85dSMukul Joshi 			/* For GFX9.4.3 and CPX mode, first XCD gets VMID range
76574c5b85dSMukul Joshi 			 * 4-9 and second XCD gets VMID range 10-15.
76674c5b85dSMukul Joshi 			 */
76774c5b85dSMukul Joshi 
76874c5b85dSMukul Joshi 			node->vm_info.first_vmid_kfd = (i%2 == 0) ?
76974c5b85dSMukul Joshi 						first_vmid_kfd :
77074c5b85dSMukul Joshi 						first_vmid_kfd+vmid_num_kfd;
77174c5b85dSMukul Joshi 			node->vm_info.last_vmid_kfd = (i%2 == 0) ?
77274c5b85dSMukul Joshi 						last_vmid_kfd-vmid_num_kfd :
77374c5b85dSMukul Joshi 						last_vmid_kfd;
77474c5b85dSMukul Joshi 			node->compute_vmid_bitmap =
77574c5b85dSMukul Joshi 				((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) -
77674c5b85dSMukul Joshi 				((0x1 << (node->vm_info.first_vmid_kfd)) - 1);
77774c5b85dSMukul Joshi 		} else {
77874c5b85dSMukul Joshi 			node->vm_info.first_vmid_kfd = first_vmid_kfd;
77974c5b85dSMukul Joshi 			node->vm_info.last_vmid_kfd = last_vmid_kfd;
78074c5b85dSMukul Joshi 			node->compute_vmid_bitmap =
78174c5b85dSMukul Joshi 				gpu_resources->compute_vmid_bitmap;
78274c5b85dSMukul Joshi 		}
78374c5b85dSMukul Joshi 		node->max_proc_per_quantum = max_proc_per_quantum;
78474c5b85dSMukul Joshi 		atomic_set(&node->sram_ecc_flag, 0);
7858dc1db31SMukul Joshi 		/* Initialize the KFD node */
7868dc1db31SMukul Joshi 		if (kfd_init_node(node)) {
7878dc1db31SMukul Joshi 			dev_err(kfd_device, "Error initializing KFD node\n");
7888dc1db31SMukul Joshi 			goto node_init_error;
7898dc1db31SMukul Joshi 		}
79074c5b85dSMukul Joshi 		kfd->nodes[i] = node;
79174c5b85dSMukul Joshi 	}
7928dc1db31SMukul Joshi 
7938dc1db31SMukul Joshi 	if (kfd_resume_iommu(kfd))
7948dc1db31SMukul Joshi 		goto kfd_resume_iommu_error;
7958dc1db31SMukul Joshi 
7968dc1db31SMukul Joshi 	amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
797938a0650SAmber Lin 
7984a488a7aSOded Gabbay 	kfd->init_complete = true;
799d69a3b76SMukul Joshi 	dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
800d69a3b76SMukul Joshi 		 kfd->adev->pdev->device);
8014a488a7aSOded Gabbay 
80279775b62SKent Russell 	pr_debug("Starting kfd with the following scheduling policy %d\n",
8038dc1db31SMukul Joshi 		node->dqm->sched_policy);
80464c7f8cfSBen Goz 
80519f6d2a6SOded Gabbay 	goto out;
80619f6d2a6SOded Gabbay 
8078dc1db31SMukul Joshi kfd_resume_iommu_error:
8088dc1db31SMukul Joshi node_init_error:
8098dc1db31SMukul Joshi node_alloc_error:
81074c5b85dSMukul Joshi 	kfd_cleanup_nodes(kfd, i);
81164d1c3a4SFelix Kuehling device_iommu_error:
812735df2baSFelix Kuehling 	kfd_doorbell_fini(kfd);
813735df2baSFelix Kuehling kfd_doorbell_error:
81473a1da0bSOded Gabbay 	kfd_gtt_sa_fini(kfd);
81573a1da0bSOded Gabbay kfd_gtt_sa_init_error:
8166bfc7c7eSGraham Sider 	amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
817e09d4fc8SOak Zeng alloc_gtt_mem_failure:
81819f6d2a6SOded Gabbay 	dev_err(kfd_device,
81979775b62SKent Russell 		"device %x:%x NOT added due to errors\n",
820d69a3b76SMukul Joshi 		kfd->adev->pdev->vendor, kfd->adev->pdev->device);
82119f6d2a6SOded Gabbay out:
82219f6d2a6SOded Gabbay 	return kfd->init_complete;
8234a488a7aSOded Gabbay }
8244a488a7aSOded Gabbay 
8254a488a7aSOded Gabbay void kgd2kfd_device_exit(struct kfd_dev *kfd)
8264a488a7aSOded Gabbay {
827b17f068aSOded Gabbay 	if (kfd->init_complete) {
82874c5b85dSMukul Joshi 		/* Cleanup KFD nodes */
82974c5b85dSMukul Joshi 		kfd_cleanup_nodes(kfd, kfd->num_nodes);
83074c5b85dSMukul Joshi 		/* Cleanup common/shared resources */
831735df2baSFelix Kuehling 		kfd_doorbell_fini(kfd);
83259d7115dSMukul Joshi 		ida_destroy(&kfd->doorbell_ida);
83373a1da0bSOded Gabbay 		kfd_gtt_sa_fini(kfd);
8346bfc7c7eSGraham Sider 		amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
835b17f068aSOded Gabbay 	}
8365b5c4e40SEvgeny Pinchuk 
8374a488a7aSOded Gabbay 	kfree(kfd);
8384a488a7aSOded Gabbay }
8394a488a7aSOded Gabbay 
840e3b7a967SShaoyun Liu int kgd2kfd_pre_reset(struct kfd_dev *kfd)
841e3b7a967SShaoyun Liu {
84274c5b85dSMukul Joshi 	struct kfd_node *node;
84374c5b85dSMukul Joshi 	int i;
8448dc1db31SMukul Joshi 
845e42051d2SShaoyun Liu 	if (!kfd->init_complete)
846e42051d2SShaoyun Liu 		return 0;
84709c34e8dSFelix Kuehling 
84874c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
84974c5b85dSMukul Joshi 		node = kfd->nodes[i];
8508dc1db31SMukul Joshi 		kfd_smi_event_update_gpu_reset(node, false);
8518dc1db31SMukul Joshi 		node->dqm->ops.pre_reset(node->dqm);
85274c5b85dSMukul Joshi 	}
85309c34e8dSFelix Kuehling 
8549593f4d6SRajneesh Bhardwaj 	kgd2kfd_suspend(kfd, false);
855e42051d2SShaoyun Liu 
85674c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++)
85774c5b85dSMukul Joshi 		kfd_signal_reset_event(kfd->nodes[i]);
85874c5b85dSMukul Joshi 
859e3b7a967SShaoyun Liu 	return 0;
860e3b7a967SShaoyun Liu }
861e3b7a967SShaoyun Liu 
862e42051d2SShaoyun Liu /*
863e42051d2SShaoyun Liu  * Fix me. KFD won't be able to resume existing process for now.
864e42051d2SShaoyun Liu  * We will keep all existing process in a evicted state and
865e42051d2SShaoyun Liu  * wait the process to be terminated.
866e42051d2SShaoyun Liu  */
867e42051d2SShaoyun Liu 
868e3b7a967SShaoyun Liu int kgd2kfd_post_reset(struct kfd_dev *kfd)
869e3b7a967SShaoyun Liu {
870a1bd079fSyu kuai 	int ret;
87174c5b85dSMukul Joshi 	struct kfd_node *node;
87274c5b85dSMukul Joshi 	int i;
873e42051d2SShaoyun Liu 
874e42051d2SShaoyun Liu 	if (!kfd->init_complete)
875e3b7a967SShaoyun Liu 		return 0;
876e42051d2SShaoyun Liu 
87774c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
87874c5b85dSMukul Joshi 		ret = kfd_resume(kfd->nodes[i]);
879e42051d2SShaoyun Liu 		if (ret)
880e42051d2SShaoyun Liu 			return ret;
88174c5b85dSMukul Joshi 	}
88274c5b85dSMukul Joshi 
883a1bd079fSyu kuai 	atomic_dec(&kfd_locked);
8849b54d201SEric Huang 
88574c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
88674c5b85dSMukul Joshi 		node = kfd->nodes[i];
8878dc1db31SMukul Joshi 		atomic_set(&node->sram_ecc_flag, 0);
8888dc1db31SMukul Joshi 		kfd_smi_event_update_gpu_reset(node, true);
88974c5b85dSMukul Joshi 	}
89055977744SMukul Joshi 
891e42051d2SShaoyun Liu 	return 0;
892e42051d2SShaoyun Liu }
893e42051d2SShaoyun Liu 
894e42051d2SShaoyun Liu bool kfd_is_locked(void)
895e42051d2SShaoyun Liu {
896e42051d2SShaoyun Liu 	return  (atomic_read(&kfd_locked) > 0);
897e3b7a967SShaoyun Liu }
898e3b7a967SShaoyun Liu 
8999593f4d6SRajneesh Bhardwaj void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
9004a488a7aSOded Gabbay {
90174c5b85dSMukul Joshi 	struct kfd_node *node;
90274c5b85dSMukul Joshi 	int i;
9038dc1db31SMukul Joshi 
904733fa1f7SYong Zhao 	if (!kfd->init_complete)
905733fa1f7SYong Zhao 		return;
906733fa1f7SYong Zhao 
9079593f4d6SRajneesh Bhardwaj 	/* for runtime suspend, skip locking kfd */
9089593f4d6SRajneesh Bhardwaj 	if (!run_pm) {
90926103436SFelix Kuehling 		/* For first KFD device suspend all the KFD processes */
910e42051d2SShaoyun Liu 		if (atomic_inc_return(&kfd_locked) == 1)
91126103436SFelix Kuehling 			kfd_suspend_all_processes();
9129593f4d6SRajneesh Bhardwaj 	}
91326103436SFelix Kuehling 
91474c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
91574c5b85dSMukul Joshi 		node = kfd->nodes[i];
9168dc1db31SMukul Joshi 		node->dqm->ops.stop(node->dqm);
91774c5b85dSMukul Joshi 	}
91864d1c3a4SFelix Kuehling 	kfd_iommu_suspend(kfd);
9194a488a7aSOded Gabbay }
9204a488a7aSOded Gabbay 
9219593f4d6SRajneesh Bhardwaj int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
9224a488a7aSOded Gabbay {
92374c5b85dSMukul Joshi 	int ret, count, i;
92426103436SFelix Kuehling 
925b8935a7cSYong Zhao 	if (!kfd->init_complete)
926b8935a7cSYong Zhao 		return 0;
927b17f068aSOded Gabbay 
92874c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
92974c5b85dSMukul Joshi 		ret = kfd_resume(kfd->nodes[i]);
93026103436SFelix Kuehling 		if (ret)
93126103436SFelix Kuehling 			return ret;
93274c5b85dSMukul Joshi 	}
933b17f068aSOded Gabbay 
9349593f4d6SRajneesh Bhardwaj 	/* for runtime resume, skip unlocking kfd */
9359593f4d6SRajneesh Bhardwaj 	if (!run_pm) {
936e42051d2SShaoyun Liu 		count = atomic_dec_return(&kfd_locked);
93726103436SFelix Kuehling 		WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
93826103436SFelix Kuehling 		if (count == 0)
93926103436SFelix Kuehling 			ret = kfd_resume_all_processes();
9409593f4d6SRajneesh Bhardwaj 	}
94126103436SFelix Kuehling 
94226103436SFelix Kuehling 	return ret;
9434ebc7182SYong Zhao }
9444ebc7182SYong Zhao 
945f8846323SJames Zhu int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
946b8935a7cSYong Zhao {
9477ee938acSFelix Kuehling 	if (!kfd->init_complete)
9487ee938acSFelix Kuehling 		return 0;
9497ee938acSFelix Kuehling 
9507ee938acSFelix Kuehling 	return kfd_resume_iommu(kfd);
9517ee938acSFelix Kuehling }
9527ee938acSFelix Kuehling 
9537ee938acSFelix Kuehling static int kfd_resume_iommu(struct kfd_dev *kfd)
9547ee938acSFelix Kuehling {
955b8935a7cSYong Zhao 	int err = 0;
956b8935a7cSYong Zhao 
95764d1c3a4SFelix Kuehling 	err = kfd_iommu_resume(kfd);
958f8846323SJames Zhu 	if (err)
95964d1c3a4SFelix Kuehling 		dev_err(kfd_device,
96064d1c3a4SFelix Kuehling 			"Failed to resume IOMMU for device %x:%x\n",
961d69a3b76SMukul Joshi 			kfd->adev->pdev->vendor, kfd->adev->pdev->device);
96264d1c3a4SFelix Kuehling 	return err;
96364d1c3a4SFelix Kuehling }
964733fa1f7SYong Zhao 
9658dc1db31SMukul Joshi static int kfd_resume(struct kfd_node *node)
966f8846323SJames Zhu {
967f8846323SJames Zhu 	int err = 0;
968f8846323SJames Zhu 
9698dc1db31SMukul Joshi 	err = node->dqm->ops.start(node->dqm);
970499f4d38SYifan Zhang 	if (err)
971b8935a7cSYong Zhao 		dev_err(kfd_device,
972b8935a7cSYong Zhao 			"Error starting queue manager for device %x:%x\n",
9738dc1db31SMukul Joshi 			node->adev->pdev->vendor, node->adev->pdev->device);
974b17f068aSOded Gabbay 
975b8935a7cSYong Zhao 	return err;
9764a488a7aSOded Gabbay }
9774a488a7aSOded Gabbay 
978b3eca59dSPhilip Yang static inline void kfd_queue_work(struct workqueue_struct *wq,
979b3eca59dSPhilip Yang 				  struct work_struct *work)
980b3eca59dSPhilip Yang {
981b3eca59dSPhilip Yang 	int cpu, new_cpu;
982b3eca59dSPhilip Yang 
983b3eca59dSPhilip Yang 	cpu = new_cpu = smp_processor_id();
984b3eca59dSPhilip Yang 	do {
985b3eca59dSPhilip Yang 		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
986b3eca59dSPhilip Yang 		if (cpu_to_node(new_cpu) == numa_node_id())
987b3eca59dSPhilip Yang 			break;
988b3eca59dSPhilip Yang 	} while (cpu != new_cpu);
989b3eca59dSPhilip Yang 
990b3eca59dSPhilip Yang 	queue_work_on(new_cpu, wq, work);
991b3eca59dSPhilip Yang }
992b3eca59dSPhilip Yang 
993b3f5e6b4SAndrew Lewycky /* This is called directly from KGD at ISR. */
994b3f5e6b4SAndrew Lewycky void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
9954a488a7aSOded Gabbay {
99674c5b85dSMukul Joshi 	uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE], i;
99758e69886SLan Xiao 	bool is_patched = false;
9982383a767SChristian König 	unsigned long flags;
99974c5b85dSMukul Joshi 	struct kfd_node *node;
100058e69886SLan Xiao 
10012249d558SAndrew Lewycky 	if (!kfd->init_complete)
10022249d558SAndrew Lewycky 		return;
10032249d558SAndrew Lewycky 
1004f0dc99a6SGraham Sider 	if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
100558e69886SLan Xiao 		dev_err_once(kfd_device, "Ring entry too small\n");
100658e69886SLan Xiao 		return;
100758e69886SLan Xiao 	}
100858e69886SLan Xiao 
100974c5b85dSMukul Joshi 	for (i = 0; i < kfd->num_nodes; i++) {
101074c5b85dSMukul Joshi 		node = kfd->nodes[i];
10118dc1db31SMukul Joshi 		spin_lock_irqsave(&node->interrupt_lock, flags);
10122249d558SAndrew Lewycky 
10138dc1db31SMukul Joshi 		if (node->interrupts_active
10148dc1db31SMukul Joshi 		    && interrupt_is_wanted(node, ih_ring_entry,
101558e69886SLan Xiao 			    	patched_ihre, &is_patched)
10168dc1db31SMukul Joshi 		    && enqueue_ih_ring_entry(node,
101774c5b85dSMukul Joshi 			    	is_patched ? patched_ihre : ih_ring_entry)) {
10188dc1db31SMukul Joshi 			kfd_queue_work(node->ih_wq, &node->interrupt_work);
10198dc1db31SMukul Joshi 			spin_unlock_irqrestore(&node->interrupt_lock, flags);
102074c5b85dSMukul Joshi 				return;
102174c5b85dSMukul Joshi 		}
102274c5b85dSMukul Joshi 		spin_unlock_irqrestore(&node->interrupt_lock, flags);
102374c5b85dSMukul Joshi 	}
102474c5b85dSMukul Joshi 
10254a488a7aSOded Gabbay }
10266e81090bSOded Gabbay 
1027c7f21978SPhilip Yang int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
10286b95e797SFelix Kuehling {
10296b95e797SFelix Kuehling 	struct kfd_process *p;
10306b95e797SFelix Kuehling 	int r;
10316b95e797SFelix Kuehling 
10326b95e797SFelix Kuehling 	/* Because we are called from arbitrary context (workqueue) as opposed
10336b95e797SFelix Kuehling 	 * to process context, kfd_process could attempt to exit while we are
10346b95e797SFelix Kuehling 	 * running so the lookup function increments the process ref count.
10356b95e797SFelix Kuehling 	 */
10366b95e797SFelix Kuehling 	p = kfd_lookup_process_by_mm(mm);
10376b95e797SFelix Kuehling 	if (!p)
10386b95e797SFelix Kuehling 		return -ESRCH;
10396b95e797SFelix Kuehling 
1040b2057956SFelix Kuehling 	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
1041c7f21978SPhilip Yang 	r = kfd_process_evict_queues(p, trigger);
10426b95e797SFelix Kuehling 
10436b95e797SFelix Kuehling 	kfd_unref_process(p);
10446b95e797SFelix Kuehling 	return r;
10456b95e797SFelix Kuehling }
10466b95e797SFelix Kuehling 
10476b95e797SFelix Kuehling int kgd2kfd_resume_mm(struct mm_struct *mm)
10486b95e797SFelix Kuehling {
10496b95e797SFelix Kuehling 	struct kfd_process *p;
10506b95e797SFelix Kuehling 	int r;
10516b95e797SFelix Kuehling 
10526b95e797SFelix Kuehling 	/* Because we are called from arbitrary context (workqueue) as opposed
10536b95e797SFelix Kuehling 	 * to process context, kfd_process could attempt to exit while we are
10546b95e797SFelix Kuehling 	 * running so the lookup function increments the process ref count.
10556b95e797SFelix Kuehling 	 */
10566b95e797SFelix Kuehling 	p = kfd_lookup_process_by_mm(mm);
10576b95e797SFelix Kuehling 	if (!p)
10586b95e797SFelix Kuehling 		return -ESRCH;
10596b95e797SFelix Kuehling 
10606b95e797SFelix Kuehling 	r = kfd_process_restore_queues(p);
10616b95e797SFelix Kuehling 
10626b95e797SFelix Kuehling 	kfd_unref_process(p);
10636b95e797SFelix Kuehling 	return r;
10646b95e797SFelix Kuehling }
10656b95e797SFelix Kuehling 
106626103436SFelix Kuehling /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
106726103436SFelix Kuehling  *   prepare for safe eviction of KFD BOs that belong to the specified
106826103436SFelix Kuehling  *   process.
106926103436SFelix Kuehling  *
107026103436SFelix Kuehling  * @mm: mm_struct that identifies the specified KFD process
107126103436SFelix Kuehling  * @fence: eviction fence attached to KFD process BOs
107226103436SFelix Kuehling  *
107326103436SFelix Kuehling  */
107426103436SFelix Kuehling int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
107526103436SFelix Kuehling 					       struct dma_fence *fence)
107626103436SFelix Kuehling {
107726103436SFelix Kuehling 	struct kfd_process *p;
107826103436SFelix Kuehling 	unsigned long active_time;
107926103436SFelix Kuehling 	unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
108026103436SFelix Kuehling 
108126103436SFelix Kuehling 	if (!fence)
108226103436SFelix Kuehling 		return -EINVAL;
108326103436SFelix Kuehling 
108426103436SFelix Kuehling 	if (dma_fence_is_signaled(fence))
108526103436SFelix Kuehling 		return 0;
108626103436SFelix Kuehling 
108726103436SFelix Kuehling 	p = kfd_lookup_process_by_mm(mm);
108826103436SFelix Kuehling 	if (!p)
108926103436SFelix Kuehling 		return -ENODEV;
109026103436SFelix Kuehling 
109126103436SFelix Kuehling 	if (fence->seqno == p->last_eviction_seqno)
109226103436SFelix Kuehling 		goto out;
109326103436SFelix Kuehling 
109426103436SFelix Kuehling 	p->last_eviction_seqno = fence->seqno;
109526103436SFelix Kuehling 
109626103436SFelix Kuehling 	/* Avoid KFD process starvation. Wait for at least
109726103436SFelix Kuehling 	 * PROCESS_ACTIVE_TIME_MS before evicting the process again
109826103436SFelix Kuehling 	 */
109926103436SFelix Kuehling 	active_time = get_jiffies_64() - p->last_restore_timestamp;
110026103436SFelix Kuehling 	if (delay_jiffies > active_time)
110126103436SFelix Kuehling 		delay_jiffies -= active_time;
110226103436SFelix Kuehling 	else
110326103436SFelix Kuehling 		delay_jiffies = 0;
110426103436SFelix Kuehling 
110526103436SFelix Kuehling 	/* During process initialization eviction_work.dwork is initialized
110626103436SFelix Kuehling 	 * to kfd_evict_bo_worker
110726103436SFelix Kuehling 	 */
1108b2057956SFelix Kuehling 	WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
1109b2057956SFelix Kuehling 	     p->lead_thread->pid, delay_jiffies);
111026103436SFelix Kuehling 	schedule_delayed_work(&p->eviction_work, delay_jiffies);
111126103436SFelix Kuehling out:
111226103436SFelix Kuehling 	kfd_unref_process(p);
111326103436SFelix Kuehling 	return 0;
111426103436SFelix Kuehling }
111526103436SFelix Kuehling 
11166e81090bSOded Gabbay static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
11176e81090bSOded Gabbay 				unsigned int chunk_size)
11186e81090bSOded Gabbay {
111932fa8219SFelix Kuehling 	if (WARN_ON(buf_size < chunk_size))
112032fa8219SFelix Kuehling 		return -EINVAL;
112132fa8219SFelix Kuehling 	if (WARN_ON(buf_size == 0))
112232fa8219SFelix Kuehling 		return -EINVAL;
112332fa8219SFelix Kuehling 	if (WARN_ON(chunk_size == 0))
112432fa8219SFelix Kuehling 		return -EINVAL;
11256e81090bSOded Gabbay 
11266e81090bSOded Gabbay 	kfd->gtt_sa_chunk_size = chunk_size;
11276e81090bSOded Gabbay 	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
11286e81090bSOded Gabbay 
1129f43a9f18SChristophe JAILLET 	kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
1130f43a9f18SChristophe JAILLET 					   GFP_KERNEL);
11316e81090bSOded Gabbay 	if (!kfd->gtt_sa_bitmap)
11326e81090bSOded Gabbay 		return -ENOMEM;
11336e81090bSOded Gabbay 
113479775b62SKent Russell 	pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
11356e81090bSOded Gabbay 			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
11366e81090bSOded Gabbay 
11376e81090bSOded Gabbay 	mutex_init(&kfd->gtt_sa_lock);
11386e81090bSOded Gabbay 
11396e81090bSOded Gabbay 	return 0;
11406e81090bSOded Gabbay }
11416e81090bSOded Gabbay 
11426e81090bSOded Gabbay static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
11436e81090bSOded Gabbay {
11446e81090bSOded Gabbay 	mutex_destroy(&kfd->gtt_sa_lock);
1145f43a9f18SChristophe JAILLET 	bitmap_free(kfd->gtt_sa_bitmap);
11466e81090bSOded Gabbay }
11476e81090bSOded Gabbay 
11486e81090bSOded Gabbay static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
11496e81090bSOded Gabbay 						unsigned int bit_num,
11506e81090bSOded Gabbay 						unsigned int chunk_size)
11516e81090bSOded Gabbay {
11526e81090bSOded Gabbay 	return start_addr + bit_num * chunk_size;
11536e81090bSOded Gabbay }
11546e81090bSOded Gabbay 
11556e81090bSOded Gabbay static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
11566e81090bSOded Gabbay 						unsigned int bit_num,
11576e81090bSOded Gabbay 						unsigned int chunk_size)
11586e81090bSOded Gabbay {
11596e81090bSOded Gabbay 	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
11606e81090bSOded Gabbay }
11616e81090bSOded Gabbay 
11628dc1db31SMukul Joshi int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
11636e81090bSOded Gabbay 			struct kfd_mem_obj **mem_obj)
11646e81090bSOded Gabbay {
11656e81090bSOded Gabbay 	unsigned int found, start_search, cur_size;
11668dc1db31SMukul Joshi 	struct kfd_dev *kfd = node->kfd;
11676e81090bSOded Gabbay 
11686e81090bSOded Gabbay 	if (size == 0)
11696e81090bSOded Gabbay 		return -EINVAL;
11706e81090bSOded Gabbay 
11716e81090bSOded Gabbay 	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
11726e81090bSOded Gabbay 		return -ENOMEM;
11736e81090bSOded Gabbay 
11741cd106ecSFelix Kuehling 	*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
11751cd106ecSFelix Kuehling 	if (!(*mem_obj))
11766e81090bSOded Gabbay 		return -ENOMEM;
11776e81090bSOded Gabbay 
117879775b62SKent Russell 	pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
11796e81090bSOded Gabbay 
11806e81090bSOded Gabbay 	start_search = 0;
11816e81090bSOded Gabbay 
11826e81090bSOded Gabbay 	mutex_lock(&kfd->gtt_sa_lock);
11836e81090bSOded Gabbay 
11846e81090bSOded Gabbay kfd_gtt_restart_search:
11856e81090bSOded Gabbay 	/* Find the first chunk that is free */
11866e81090bSOded Gabbay 	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
11876e81090bSOded Gabbay 					kfd->gtt_sa_num_of_chunks,
11886e81090bSOded Gabbay 					start_search);
11896e81090bSOded Gabbay 
119079775b62SKent Russell 	pr_debug("Found = %d\n", found);
11916e81090bSOded Gabbay 
11926e81090bSOded Gabbay 	/* If there wasn't any free chunk, bail out */
11936e81090bSOded Gabbay 	if (found == kfd->gtt_sa_num_of_chunks)
11946e81090bSOded Gabbay 		goto kfd_gtt_no_free_chunk;
11956e81090bSOded Gabbay 
11966e81090bSOded Gabbay 	/* Update fields of mem_obj */
11976e81090bSOded Gabbay 	(*mem_obj)->range_start = found;
11986e81090bSOded Gabbay 	(*mem_obj)->range_end = found;
11996e81090bSOded Gabbay 	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
12006e81090bSOded Gabbay 					kfd->gtt_start_gpu_addr,
12016e81090bSOded Gabbay 					found,
12026e81090bSOded Gabbay 					kfd->gtt_sa_chunk_size);
12036e81090bSOded Gabbay 	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
12046e81090bSOded Gabbay 					kfd->gtt_start_cpu_ptr,
12056e81090bSOded Gabbay 					found,
12066e81090bSOded Gabbay 					kfd->gtt_sa_chunk_size);
12076e81090bSOded Gabbay 
120879775b62SKent Russell 	pr_debug("gpu_addr = %p, cpu_addr = %p\n",
12096e81090bSOded Gabbay 			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
12106e81090bSOded Gabbay 
12116e81090bSOded Gabbay 	/* If we need only one chunk, mark it as allocated and get out */
12126e81090bSOded Gabbay 	if (size <= kfd->gtt_sa_chunk_size) {
121379775b62SKent Russell 		pr_debug("Single bit\n");
1214b8b9ba58SChristophe JAILLET 		__set_bit(found, kfd->gtt_sa_bitmap);
12156e81090bSOded Gabbay 		goto kfd_gtt_out;
12166e81090bSOded Gabbay 	}
12176e81090bSOded Gabbay 
12186e81090bSOded Gabbay 	/* Otherwise, try to see if we have enough contiguous chunks */
12196e81090bSOded Gabbay 	cur_size = size - kfd->gtt_sa_chunk_size;
12206e81090bSOded Gabbay 	do {
12216e81090bSOded Gabbay 		(*mem_obj)->range_end =
12226e81090bSOded Gabbay 			find_next_zero_bit(kfd->gtt_sa_bitmap,
12236e81090bSOded Gabbay 					kfd->gtt_sa_num_of_chunks, ++found);
12246e81090bSOded Gabbay 		/*
12256e81090bSOded Gabbay 		 * If next free chunk is not contiguous than we need to
12266e81090bSOded Gabbay 		 * restart our search from the last free chunk we found (which
12276e81090bSOded Gabbay 		 * wasn't contiguous to the previous ones
12286e81090bSOded Gabbay 		 */
12296e81090bSOded Gabbay 		if ((*mem_obj)->range_end != found) {
12306e81090bSOded Gabbay 			start_search = found;
12316e81090bSOded Gabbay 			goto kfd_gtt_restart_search;
12326e81090bSOded Gabbay 		}
12336e81090bSOded Gabbay 
12346e81090bSOded Gabbay 		/*
12356e81090bSOded Gabbay 		 * If we reached end of buffer, bail out with error
12366e81090bSOded Gabbay 		 */
12376e81090bSOded Gabbay 		if (found == kfd->gtt_sa_num_of_chunks)
12386e81090bSOded Gabbay 			goto kfd_gtt_no_free_chunk;
12396e81090bSOded Gabbay 
12406e81090bSOded Gabbay 		/* Check if we don't need another chunk */
12416e81090bSOded Gabbay 		if (cur_size <= kfd->gtt_sa_chunk_size)
12426e81090bSOded Gabbay 			cur_size = 0;
12436e81090bSOded Gabbay 		else
12446e81090bSOded Gabbay 			cur_size -= kfd->gtt_sa_chunk_size;
12456e81090bSOded Gabbay 
12466e81090bSOded Gabbay 	} while (cur_size > 0);
12476e81090bSOded Gabbay 
124879775b62SKent Russell 	pr_debug("range_start = %d, range_end = %d\n",
12496e81090bSOded Gabbay 		(*mem_obj)->range_start, (*mem_obj)->range_end);
12506e81090bSOded Gabbay 
12516e81090bSOded Gabbay 	/* Mark the chunks as allocated */
1252b8b9ba58SChristophe JAILLET 	bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
1253b8b9ba58SChristophe JAILLET 		   (*mem_obj)->range_end - (*mem_obj)->range_start + 1);
12546e81090bSOded Gabbay 
12556e81090bSOded Gabbay kfd_gtt_out:
12566e81090bSOded Gabbay 	mutex_unlock(&kfd->gtt_sa_lock);
12576e81090bSOded Gabbay 	return 0;
12586e81090bSOded Gabbay 
12596e81090bSOded Gabbay kfd_gtt_no_free_chunk:
12603148a6a0SJack Zhang 	pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
12616e81090bSOded Gabbay 	mutex_unlock(&kfd->gtt_sa_lock);
12623148a6a0SJack Zhang 	kfree(*mem_obj);
12636e81090bSOded Gabbay 	return -ENOMEM;
12646e81090bSOded Gabbay }
12656e81090bSOded Gabbay 
12668dc1db31SMukul Joshi int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj)
12676e81090bSOded Gabbay {
12688dc1db31SMukul Joshi 	struct kfd_dev *kfd = node->kfd;
12698dc1db31SMukul Joshi 
12709216ed29SOded Gabbay 	/* Act like kfree when trying to free a NULL object */
12719216ed29SOded Gabbay 	if (!mem_obj)
12729216ed29SOded Gabbay 		return 0;
12736e81090bSOded Gabbay 
127479775b62SKent Russell 	pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
12756e81090bSOded Gabbay 			mem_obj, mem_obj->range_start, mem_obj->range_end);
12766e81090bSOded Gabbay 
12776e81090bSOded Gabbay 	mutex_lock(&kfd->gtt_sa_lock);
12786e81090bSOded Gabbay 
12796e81090bSOded Gabbay 	/* Mark the chunks as free */
1280b8b9ba58SChristophe JAILLET 	bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
1281b8b9ba58SChristophe JAILLET 		     mem_obj->range_end - mem_obj->range_start + 1);
12826e81090bSOded Gabbay 
12836e81090bSOded Gabbay 	mutex_unlock(&kfd->gtt_sa_lock);
12846e81090bSOded Gabbay 
12856e81090bSOded Gabbay 	kfree(mem_obj);
12866e81090bSOded Gabbay 	return 0;
12876e81090bSOded Gabbay }
1288a29ec470SShaoyun Liu 
12899b54d201SEric Huang void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
12909b54d201SEric Huang {
129174c5b85dSMukul Joshi 	/*
129274c5b85dSMukul Joshi 	 * TODO: Currently update SRAM ECC flag for first node.
129374c5b85dSMukul Joshi 	 * This needs to be updated later when we can
129474c5b85dSMukul Joshi 	 * identify SRAM ECC error on other nodes also.
129574c5b85dSMukul Joshi 	 */
12969b54d201SEric Huang 	if (kfd)
129774c5b85dSMukul Joshi 		atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
12989b54d201SEric Huang }
12999b54d201SEric Huang 
13008dc1db31SMukul Joshi void kfd_inc_compute_active(struct kfd_node *node)
130143d8107fSHarish Kasiviswanathan {
13028dc1db31SMukul Joshi 	if (atomic_inc_return(&node->kfd->compute_profile) == 1)
13038dc1db31SMukul Joshi 		amdgpu_amdkfd_set_compute_idle(node->adev, false);
130443d8107fSHarish Kasiviswanathan }
130543d8107fSHarish Kasiviswanathan 
13068dc1db31SMukul Joshi void kfd_dec_compute_active(struct kfd_node *node)
130743d8107fSHarish Kasiviswanathan {
13088dc1db31SMukul Joshi 	int count = atomic_dec_return(&node->kfd->compute_profile);
130943d8107fSHarish Kasiviswanathan 
131043d8107fSHarish Kasiviswanathan 	if (count == 0)
13118dc1db31SMukul Joshi 		amdgpu_amdkfd_set_compute_idle(node->adev, true);
131243d8107fSHarish Kasiviswanathan 	WARN_ONCE(count < 0, "Compute profile ref. count error");
131343d8107fSHarish Kasiviswanathan }
131443d8107fSHarish Kasiviswanathan 
1315410e302eSGraham Sider void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
13162c2b0d88SMukul Joshi {
131774c5b85dSMukul Joshi 	/*
131874c5b85dSMukul Joshi 	 * TODO: For now, raise the throttling event only on first node.
131974c5b85dSMukul Joshi 	 * This will need to change after we are able to determine
132074c5b85dSMukul Joshi 	 * which node raised the throttling event.
132174c5b85dSMukul Joshi 	 */
1322158fc08dSAmber Lin 	if (kfd && kfd->init_complete)
132374c5b85dSMukul Joshi 		kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
132474c5b85dSMukul Joshi 							throttle_bitmask);
13252c2b0d88SMukul Joshi }
13262c2b0d88SMukul Joshi 
1327ee2f17f4SAmber Lin /* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
1328ee2f17f4SAmber Lin  * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
1329ee2f17f4SAmber Lin  * When the device has more than two engines, we reserve two for PCIe to enable
1330ee2f17f4SAmber Lin  * full-duplex and the rest are used as XGMI.
1331ee2f17f4SAmber Lin  */
13328dc1db31SMukul Joshi unsigned int kfd_get_num_sdma_engines(struct kfd_node *node)
1333ee2f17f4SAmber Lin {
1334ee2f17f4SAmber Lin 	/* If XGMI is not supported, all SDMA engines are PCIe */
13358dc1db31SMukul Joshi 	if (!node->adev->gmc.xgmi.supported)
1336a805889aSMukul Joshi 		return node->adev->sdma.num_instances/(int)node->kfd->num_nodes;
1337ee2f17f4SAmber Lin 
1338a805889aSMukul Joshi 	return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2);
1339ee2f17f4SAmber Lin }
1340ee2f17f4SAmber Lin 
13418dc1db31SMukul Joshi unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
1342ee2f17f4SAmber Lin {
1343ee2f17f4SAmber Lin 	/* After reserved for PCIe, the rest of engines are XGMI */
1344a805889aSMukul Joshi 	return node->adev->sdma.num_instances/(int)node->kfd->num_nodes -
1345a805889aSMukul Joshi 		kfd_get_num_sdma_engines(node);
1346ee2f17f4SAmber Lin }
1347ee2f17f4SAmber Lin 
1348a29ec470SShaoyun Liu #if defined(CONFIG_DEBUG_FS)
1349a29ec470SShaoyun Liu 
1350a29ec470SShaoyun Liu /* This function will send a package to HIQ to hang the HWS
1351a29ec470SShaoyun Liu  * which will trigger a GPU reset and bring the HWS back to normal state
1352a29ec470SShaoyun Liu  */
13538dc1db31SMukul Joshi int kfd_debugfs_hang_hws(struct kfd_node *dev)
1354a29ec470SShaoyun Liu {
1355a29ec470SShaoyun Liu 	if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1356a29ec470SShaoyun Liu 		pr_err("HWS is not enabled");
1357a29ec470SShaoyun Liu 		return -EINVAL;
1358a29ec470SShaoyun Liu 	}
1359a29ec470SShaoyun Liu 
13604f942aaeSOak Zeng 	return dqm_debugfs_hang_hws(dev->dqm);
1361a29ec470SShaoyun Liu }
1362a29ec470SShaoyun Liu 
1363a29ec470SShaoyun Liu #endif
1364