1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/ratelimit.h>
26 #include <linux/printk.h>
27 #include <linux/slab.h>
28 #include <linux/list.h>
29 #include <linux/types.h>
30 #include <linux/bitops.h>
31 #include <linux/sched.h>
32 #include "kfd_priv.h"
33 #include "kfd_device_queue_manager.h"
34 #include "kfd_mqd_manager.h"
35 #include "cik_regs.h"
36 #include "kfd_kernel_queue.h"
37 #include "amdgpu_amdkfd.h"
38 #include "mes_api_def.h"
39 
40 /* Size of the per-pipe EOP queue */
41 #define CIK_HPD_EOP_BYTES_LOG2 11
42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
43 
44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
45 				  u32 pasid, unsigned int vmid);
46 
47 static int execute_queues_cpsch(struct device_queue_manager *dqm,
48 				enum kfd_unmap_queues_filter filter,
49 				uint32_t filter_param);
50 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
51 				enum kfd_unmap_queues_filter filter,
52 				uint32_t filter_param, bool reset);
53 
54 static int map_queues_cpsch(struct device_queue_manager *dqm);
55 
56 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
57 				struct queue *q);
58 
59 static inline void deallocate_hqd(struct device_queue_manager *dqm,
60 				struct queue *q);
61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
62 static int allocate_sdma_queue(struct device_queue_manager *dqm,
63 				struct queue *q, const uint32_t *restore_sdma_id);
64 static void kfd_process_hw_exception(struct work_struct *work);
65 
66 static inline
67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
68 {
69 	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
70 		return KFD_MQD_TYPE_SDMA;
71 	return KFD_MQD_TYPE_CP;
72 }
73 
74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
75 {
76 	int i;
77 	int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec
78 		+ pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe;
79 
80 	/* queue is available for KFD usage if bit is 1 */
81 	for (i = 0; i <  dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i)
82 		if (test_bit(pipe_offset + i,
83 			      dqm->dev->kfd->shared_resources.cp_queue_bitmap))
84 			return true;
85 	return false;
86 }
87 
88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
89 {
90 	return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
91 				KGD_MAX_QUEUES);
92 }
93 
94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
95 {
96 	return dqm->dev->kfd->shared_resources.num_queue_per_pipe;
97 }
98 
99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
100 {
101 	return dqm->dev->kfd->shared_resources.num_pipe_per_mec;
102 }
103 
104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
105 {
106 	return kfd_get_num_sdma_engines(dqm->dev) +
107 		kfd_get_num_xgmi_sdma_engines(dqm->dev);
108 }
109 
110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
111 {
112 	return kfd_get_num_sdma_engines(dqm->dev) *
113 		dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
114 }
115 
116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
117 {
118 	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
119 		dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
120 }
121 
122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
123 {
124 	return dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap;
125 }
126 
127 static void init_sdma_bitmaps(struct device_queue_manager *dqm)
128 {
129 	bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES);
130 	bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm));
131 
132 	bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
133 	bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm));
134 }
135 
136 void program_sh_mem_settings(struct device_queue_manager *dqm,
137 					struct qcm_process_device *qpd)
138 {
139 	int xcc = 0;
140 
141 	for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
142 		dqm->dev->kfd2kgd->program_sh_mem_settings(
143 						dqm->dev->adev, qpd->vmid,
144 						qpd->sh_mem_config,
145 						qpd->sh_mem_ape1_base,
146 						qpd->sh_mem_ape1_limit,
147 						qpd->sh_mem_bases,
148 						dqm->dev->start_xcc_id + xcc);
149 }
150 
151 static void kfd_hws_hang(struct device_queue_manager *dqm)
152 {
153 	/*
154 	 * Issue a GPU reset if HWS is unresponsive
155 	 */
156 	dqm->is_hws_hang = true;
157 
158 	/* It's possible we're detecting a HWS hang in the
159 	 * middle of a GPU reset. No need to schedule another
160 	 * reset in this case.
161 	 */
162 	if (!dqm->is_resetting)
163 		schedule_work(&dqm->hw_exception_work);
164 }
165 
166 static int convert_to_mes_queue_type(int queue_type)
167 {
168 	int mes_queue_type;
169 
170 	switch (queue_type) {
171 	case KFD_QUEUE_TYPE_COMPUTE:
172 		mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
173 		break;
174 	case KFD_QUEUE_TYPE_SDMA:
175 		mes_queue_type = MES_QUEUE_TYPE_SDMA;
176 		break;
177 	default:
178 		WARN(1, "Invalid queue type %d", queue_type);
179 		mes_queue_type = -EINVAL;
180 		break;
181 	}
182 
183 	return mes_queue_type;
184 }
185 
186 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
187 			 struct qcm_process_device *qpd)
188 {
189 	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
190 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
191 	struct mes_add_queue_input queue_input;
192 	int r, queue_type;
193 	uint64_t wptr_addr_off;
194 
195 	if (dqm->is_hws_hang)
196 		return -EIO;
197 
198 	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
199 	queue_input.process_id = qpd->pqm->process->pasid;
200 	queue_input.page_table_base_addr =  qpd->page_table_base;
201 	queue_input.process_va_start = 0;
202 	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
203 	/* MES unit for quantum is 100ns */
204 	queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
205 	queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
206 	queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
207 	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
208 	queue_input.inprocess_gang_priority = q->properties.priority;
209 	queue_input.gang_global_priority_level =
210 					AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
211 	queue_input.doorbell_offset = q->properties.doorbell_off;
212 	queue_input.mqd_addr = q->gart_mqd_addr;
213 	queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
214 
215 	if (q->wptr_bo) {
216 		wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
217 		queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
218 	}
219 
220 	queue_input.is_kfd_process = 1;
221 	queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
222 	queue_input.queue_size = q->properties.queue_size >> 2;
223 
224 	queue_input.paging = false;
225 	queue_input.tba_addr = qpd->tba_addr;
226 	queue_input.tma_addr = qpd->tma_addr;
227 
228 	queue_type = convert_to_mes_queue_type(q->properties.type);
229 	if (queue_type < 0) {
230 		pr_err("Queue type not supported with MES, queue:%d\n",
231 				q->properties.type);
232 		return -EINVAL;
233 	}
234 	queue_input.queue_type = (uint32_t)queue_type;
235 
236 	if (q->gws) {
237 		queue_input.gws_base = 0;
238 		queue_input.gws_size = qpd->num_gws;
239 	}
240 
241 	amdgpu_mes_lock(&adev->mes);
242 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
243 	amdgpu_mes_unlock(&adev->mes);
244 	if (r) {
245 		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
246 			q->properties.doorbell_off);
247 		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
248 		kfd_hws_hang(dqm);
249 }
250 
251 	return r;
252 }
253 
254 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
255 			struct qcm_process_device *qpd)
256 {
257 	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
258 	int r;
259 	struct mes_remove_queue_input queue_input;
260 
261 	if (dqm->is_hws_hang)
262 		return -EIO;
263 
264 	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
265 	queue_input.doorbell_offset = q->properties.doorbell_off;
266 	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
267 
268 	amdgpu_mes_lock(&adev->mes);
269 	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
270 	amdgpu_mes_unlock(&adev->mes);
271 
272 	if (r) {
273 		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
274 			q->properties.doorbell_off);
275 		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
276 		kfd_hws_hang(dqm);
277 	}
278 
279 	return r;
280 }
281 
282 static int remove_all_queues_mes(struct device_queue_manager *dqm)
283 {
284 	struct device_process_node *cur;
285 	struct qcm_process_device *qpd;
286 	struct queue *q;
287 	int retval = 0;
288 
289 	list_for_each_entry(cur, &dqm->queues, list) {
290 		qpd = cur->qpd;
291 		list_for_each_entry(q, &qpd->queues_list, list) {
292 			if (q->properties.is_active) {
293 				retval = remove_queue_mes(dqm, q, qpd);
294 				if (retval) {
295 					pr_err("%s: Failed to remove queue %d for dev %d",
296 						__func__,
297 						q->properties.queue_id,
298 						dqm->dev->id);
299 					return retval;
300 				}
301 			}
302 		}
303 	}
304 
305 	return retval;
306 }
307 
308 static void increment_queue_count(struct device_queue_manager *dqm,
309 				  struct qcm_process_device *qpd,
310 				  struct queue *q)
311 {
312 	dqm->active_queue_count++;
313 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
314 	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
315 		dqm->active_cp_queue_count++;
316 
317 	if (q->properties.is_gws) {
318 		dqm->gws_queue_count++;
319 		qpd->mapped_gws_queue = true;
320 	}
321 }
322 
323 static void decrement_queue_count(struct device_queue_manager *dqm,
324 				  struct qcm_process_device *qpd,
325 				  struct queue *q)
326 {
327 	dqm->active_queue_count--;
328 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
329 	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
330 		dqm->active_cp_queue_count--;
331 
332 	if (q->properties.is_gws) {
333 		dqm->gws_queue_count--;
334 		qpd->mapped_gws_queue = false;
335 	}
336 }
337 
338 /*
339  * Allocate a doorbell ID to this queue.
340  * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
341  */
342 static int allocate_doorbell(struct qcm_process_device *qpd,
343 			     struct queue *q,
344 			     uint32_t const *restore_id)
345 {
346 	struct kfd_node *dev = qpd->dqm->dev;
347 
348 	if (!KFD_IS_SOC15(dev)) {
349 		/* On pre-SOC15 chips we need to use the queue ID to
350 		 * preserve the user mode ABI.
351 		 */
352 
353 		if (restore_id && *restore_id != q->properties.queue_id)
354 			return -EINVAL;
355 
356 		q->doorbell_id = q->properties.queue_id;
357 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
358 			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
359 		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
360 		 * doorbell assignments based on the engine and queue id.
361 		 * The doobell index distance between RLC (2*i) and (2*i+1)
362 		 * for a SDMA engine is 512.
363 		 */
364 
365 		uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx;
366 
367 		/*
368 		 * q->properties.sdma_engine_id corresponds to the virtual
369 		 * sdma engine number. However, for doorbell allocation,
370 		 * we need the physical sdma engine id in order to get the
371 		 * correct doorbell offset.
372 		 */
373 		uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
374 					       get_num_all_sdma_engines(qpd->dqm) +
375 					       q->properties.sdma_engine_id]
376 						+ (q->properties.sdma_queue_id & 1)
377 						* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
378 						+ (q->properties.sdma_queue_id >> 1);
379 
380 		if (restore_id && *restore_id != valid_id)
381 			return -EINVAL;
382 		q->doorbell_id = valid_id;
383 	} else {
384 		/* For CP queues on SOC15 */
385 		if (restore_id) {
386 			/* make sure that ID is free  */
387 			if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
388 				return -EINVAL;
389 
390 			q->doorbell_id = *restore_id;
391 		} else {
392 			/* or reserve a free doorbell ID */
393 			unsigned int found;
394 
395 			found = find_first_zero_bit(qpd->doorbell_bitmap,
396 						KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
397 			if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
398 				pr_debug("No doorbells available");
399 				return -EBUSY;
400 			}
401 			set_bit(found, qpd->doorbell_bitmap);
402 			q->doorbell_id = found;
403 		}
404 	}
405 
406 	q->properties.doorbell_off =
407 		kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd),
408 					  q->doorbell_id);
409 	return 0;
410 }
411 
412 static void deallocate_doorbell(struct qcm_process_device *qpd,
413 				struct queue *q)
414 {
415 	unsigned int old;
416 	struct kfd_node *dev = qpd->dqm->dev;
417 
418 	if (!KFD_IS_SOC15(dev) ||
419 	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
420 	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
421 		return;
422 
423 	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
424 	WARN_ON(!old);
425 }
426 
427 static void program_trap_handler_settings(struct device_queue_manager *dqm,
428 				struct qcm_process_device *qpd)
429 {
430 	int xcc = 0;
431 
432 	if (dqm->dev->kfd2kgd->program_trap_handler_settings)
433 		for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
434 			dqm->dev->kfd2kgd->program_trap_handler_settings(
435 						dqm->dev->adev, qpd->vmid,
436 						qpd->tba_addr, qpd->tma_addr,
437 						dqm->dev->start_xcc_id + xcc);
438 }
439 
440 static int allocate_vmid(struct device_queue_manager *dqm,
441 			struct qcm_process_device *qpd,
442 			struct queue *q)
443 {
444 	int allocated_vmid = -1, i;
445 
446 	for (i = dqm->dev->vm_info.first_vmid_kfd;
447 			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
448 		if (!dqm->vmid_pasid[i]) {
449 			allocated_vmid = i;
450 			break;
451 		}
452 	}
453 
454 	if (allocated_vmid < 0) {
455 		pr_err("no more vmid to allocate\n");
456 		return -ENOSPC;
457 	}
458 
459 	pr_debug("vmid allocated: %d\n", allocated_vmid);
460 
461 	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
462 
463 	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
464 
465 	qpd->vmid = allocated_vmid;
466 	q->properties.vmid = allocated_vmid;
467 
468 	program_sh_mem_settings(dqm, qpd);
469 
470 	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled)
471 		program_trap_handler_settings(dqm, qpd);
472 
473 	/* qpd->page_table_base is set earlier when register_process()
474 	 * is called, i.e. when the first queue is created.
475 	 */
476 	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
477 			qpd->vmid,
478 			qpd->page_table_base);
479 	/* invalidate the VM context after pasid and vmid mapping is set up */
480 	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
481 
482 	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
483 		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
484 				qpd->sh_hidden_private_base, qpd->vmid);
485 
486 	return 0;
487 }
488 
489 static int flush_texture_cache_nocpsch(struct kfd_node *kdev,
490 				struct qcm_process_device *qpd)
491 {
492 	const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
493 	int ret;
494 
495 	if (!qpd->ib_kaddr)
496 		return -ENOMEM;
497 
498 	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
499 	if (ret)
500 		return ret;
501 
502 	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
503 				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
504 				pmf->release_mem_size / sizeof(uint32_t));
505 }
506 
507 static void deallocate_vmid(struct device_queue_manager *dqm,
508 				struct qcm_process_device *qpd,
509 				struct queue *q)
510 {
511 	/* On GFX v7, CP doesn't flush TC at dequeue */
512 	if (q->device->adev->asic_type == CHIP_HAWAII)
513 		if (flush_texture_cache_nocpsch(q->device, qpd))
514 			pr_err("Failed to flush TC\n");
515 
516 	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
517 
518 	/* Release the vmid mapping */
519 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
520 	dqm->vmid_pasid[qpd->vmid] = 0;
521 
522 	qpd->vmid = 0;
523 	q->properties.vmid = 0;
524 }
525 
526 static int create_queue_nocpsch(struct device_queue_manager *dqm,
527 				struct queue *q,
528 				struct qcm_process_device *qpd,
529 				const struct kfd_criu_queue_priv_data *qd,
530 				const void *restore_mqd, const void *restore_ctl_stack)
531 {
532 	struct mqd_manager *mqd_mgr;
533 	int retval;
534 
535 	dqm_lock(dqm);
536 
537 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
538 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
539 				dqm->total_queue_count);
540 		retval = -EPERM;
541 		goto out_unlock;
542 	}
543 
544 	if (list_empty(&qpd->queues_list)) {
545 		retval = allocate_vmid(dqm, qpd, q);
546 		if (retval)
547 			goto out_unlock;
548 	}
549 	q->properties.vmid = qpd->vmid;
550 	/*
551 	 * Eviction state logic: mark all queues as evicted, even ones
552 	 * not currently active. Restoring inactive queues later only
553 	 * updates the is_evicted flag but is a no-op otherwise.
554 	 */
555 	q->properties.is_evicted = !!qpd->evicted;
556 
557 	q->properties.tba_addr = qpd->tba_addr;
558 	q->properties.tma_addr = qpd->tma_addr;
559 
560 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
561 			q->properties.type)];
562 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
563 		retval = allocate_hqd(dqm, q);
564 		if (retval)
565 			goto deallocate_vmid;
566 		pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
567 			q->pipe, q->queue);
568 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
569 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
570 		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
571 		if (retval)
572 			goto deallocate_vmid;
573 		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
574 	}
575 
576 	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
577 	if (retval)
578 		goto out_deallocate_hqd;
579 
580 	/* Temporarily release dqm lock to avoid a circular lock dependency */
581 	dqm_unlock(dqm);
582 	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
583 	dqm_lock(dqm);
584 
585 	if (!q->mqd_mem_obj) {
586 		retval = -ENOMEM;
587 		goto out_deallocate_doorbell;
588 	}
589 
590 	if (qd)
591 		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
592 				     &q->properties, restore_mqd, restore_ctl_stack,
593 				     qd->ctl_stack_size);
594 	else
595 		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
596 					&q->gart_mqd_addr, &q->properties);
597 
598 	if (q->properties.is_active) {
599 		if (!dqm->sched_running) {
600 			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
601 			goto add_queue_to_list;
602 		}
603 
604 		if (WARN(q->process->mm != current->mm,
605 					"should only run in user thread"))
606 			retval = -EFAULT;
607 		else
608 			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
609 					q->queue, &q->properties, current->mm);
610 		if (retval)
611 			goto out_free_mqd;
612 	}
613 
614 add_queue_to_list:
615 	list_add(&q->list, &qpd->queues_list);
616 	qpd->queue_count++;
617 	if (q->properties.is_active)
618 		increment_queue_count(dqm, qpd, q);
619 
620 	/*
621 	 * Unconditionally increment this counter, regardless of the queue's
622 	 * type or whether the queue is active.
623 	 */
624 	dqm->total_queue_count++;
625 	pr_debug("Total of %d queues are accountable so far\n",
626 			dqm->total_queue_count);
627 	goto out_unlock;
628 
629 out_free_mqd:
630 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
631 out_deallocate_doorbell:
632 	deallocate_doorbell(qpd, q);
633 out_deallocate_hqd:
634 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
635 		deallocate_hqd(dqm, q);
636 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
637 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
638 		deallocate_sdma_queue(dqm, q);
639 deallocate_vmid:
640 	if (list_empty(&qpd->queues_list))
641 		deallocate_vmid(dqm, qpd, q);
642 out_unlock:
643 	dqm_unlock(dqm);
644 	return retval;
645 }
646 
647 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
648 {
649 	bool set;
650 	int pipe, bit, i;
651 
652 	set = false;
653 
654 	for (pipe = dqm->next_pipe_to_allocate, i = 0;
655 			i < get_pipes_per_mec(dqm);
656 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
657 
658 		if (!is_pipe_enabled(dqm, 0, pipe))
659 			continue;
660 
661 		if (dqm->allocated_queues[pipe] != 0) {
662 			bit = ffs(dqm->allocated_queues[pipe]) - 1;
663 			dqm->allocated_queues[pipe] &= ~(1 << bit);
664 			q->pipe = pipe;
665 			q->queue = bit;
666 			set = true;
667 			break;
668 		}
669 	}
670 
671 	if (!set)
672 		return -EBUSY;
673 
674 	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
675 	/* horizontal hqd allocation */
676 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
677 
678 	return 0;
679 }
680 
681 static inline void deallocate_hqd(struct device_queue_manager *dqm,
682 				struct queue *q)
683 {
684 	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
685 }
686 
687 #define SQ_IND_CMD_CMD_KILL		0x00000003
688 #define SQ_IND_CMD_MODE_BROADCAST	0x00000001
689 
690 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p)
691 {
692 	int status = 0;
693 	unsigned int vmid;
694 	uint16_t queried_pasid;
695 	union SQ_CMD_BITS reg_sq_cmd;
696 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
697 	struct kfd_process_device *pdd;
698 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
699 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
700 	int xcc = 0;
701 
702 	reg_sq_cmd.u32All = 0;
703 	reg_gfx_index.u32All = 0;
704 
705 	pr_debug("Killing all process wavefronts\n");
706 
707 	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
708 		pr_err("no vmid pasid mapping supported \n");
709 		return -EOPNOTSUPP;
710 	}
711 
712 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
713 	 * ATC_VMID15_PASID_MAPPING
714 	 * to check which VMID the current process is mapped to.
715 	 */
716 
717 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
718 		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
719 				(dev->adev, vmid, &queried_pasid);
720 
721 		if (status && queried_pasid == p->pasid) {
722 			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
723 					vmid, p->pasid);
724 			break;
725 		}
726 	}
727 
728 	if (vmid > last_vmid_to_scan) {
729 		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
730 		return -EFAULT;
731 	}
732 
733 	/* taking the VMID for that process on the safe way using PDD */
734 	pdd = kfd_get_process_device_data(dev, p);
735 	if (!pdd)
736 		return -EFAULT;
737 
738 	reg_gfx_index.bits.sh_broadcast_writes = 1;
739 	reg_gfx_index.bits.se_broadcast_writes = 1;
740 	reg_gfx_index.bits.instance_broadcast_writes = 1;
741 	reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
742 	reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
743 	reg_sq_cmd.bits.vm_id = vmid;
744 
745 	for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
746 		dev->kfd2kgd->wave_control_execute(dev->adev,
747 					reg_gfx_index.u32All,
748 					reg_sq_cmd.u32All,
749 					dev->start_xcc_id + xcc);
750 
751 	return 0;
752 }
753 
754 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
755  * to avoid asynchronized access
756  */
757 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
758 				struct qcm_process_device *qpd,
759 				struct queue *q)
760 {
761 	int retval;
762 	struct mqd_manager *mqd_mgr;
763 
764 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
765 			q->properties.type)];
766 
767 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
768 		deallocate_hqd(dqm, q);
769 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
770 		deallocate_sdma_queue(dqm, q);
771 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
772 		deallocate_sdma_queue(dqm, q);
773 	else {
774 		pr_debug("q->properties.type %d is invalid\n",
775 				q->properties.type);
776 		return -EINVAL;
777 	}
778 	dqm->total_queue_count--;
779 
780 	deallocate_doorbell(qpd, q);
781 
782 	if (!dqm->sched_running) {
783 		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
784 		return 0;
785 	}
786 
787 	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
788 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
789 				KFD_UNMAP_LATENCY_MS,
790 				q->pipe, q->queue);
791 	if (retval == -ETIME)
792 		qpd->reset_wavefronts = true;
793 
794 	list_del(&q->list);
795 	if (list_empty(&qpd->queues_list)) {
796 		if (qpd->reset_wavefronts) {
797 			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
798 					dqm->dev);
799 			/* dbgdev_wave_reset_wavefronts has to be called before
800 			 * deallocate_vmid(), i.e. when vmid is still in use.
801 			 */
802 			dbgdev_wave_reset_wavefronts(dqm->dev,
803 					qpd->pqm->process);
804 			qpd->reset_wavefronts = false;
805 		}
806 
807 		deallocate_vmid(dqm, qpd, q);
808 	}
809 	qpd->queue_count--;
810 	if (q->properties.is_active)
811 		decrement_queue_count(dqm, qpd, q);
812 
813 	return retval;
814 }
815 
816 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
817 				struct qcm_process_device *qpd,
818 				struct queue *q)
819 {
820 	int retval;
821 	uint64_t sdma_val = 0;
822 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
823 	struct mqd_manager *mqd_mgr =
824 		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
825 
826 	/* Get the SDMA queue stats */
827 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
828 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
829 		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
830 							&sdma_val);
831 		if (retval)
832 			pr_err("Failed to read SDMA queue counter for queue: %d\n",
833 				q->properties.queue_id);
834 	}
835 
836 	dqm_lock(dqm);
837 	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
838 	if (!retval)
839 		pdd->sdma_past_activity_counter += sdma_val;
840 	dqm_unlock(dqm);
841 
842 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
843 
844 	return retval;
845 }
846 
847 static int update_queue(struct device_queue_manager *dqm, struct queue *q,
848 			struct mqd_update_info *minfo)
849 {
850 	int retval = 0;
851 	struct mqd_manager *mqd_mgr;
852 	struct kfd_process_device *pdd;
853 	bool prev_active = false;
854 
855 	dqm_lock(dqm);
856 	pdd = kfd_get_process_device_data(q->device, q->process);
857 	if (!pdd) {
858 		retval = -ENODEV;
859 		goto out_unlock;
860 	}
861 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
862 			q->properties.type)];
863 
864 	/* Save previous activity state for counters */
865 	prev_active = q->properties.is_active;
866 
867 	/* Make sure the queue is unmapped before updating the MQD */
868 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
869 		if (!dqm->dev->kfd->shared_resources.enable_mes)
870 			retval = unmap_queues_cpsch(dqm,
871 						    KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
872 		else if (prev_active)
873 			retval = remove_queue_mes(dqm, q, &pdd->qpd);
874 
875 		if (retval) {
876 			pr_err("unmap queue failed\n");
877 			goto out_unlock;
878 		}
879 	} else if (prev_active &&
880 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
881 		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
882 		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
883 
884 		if (!dqm->sched_running) {
885 			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
886 			goto out_unlock;
887 		}
888 
889 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
890 				(dqm->dev->kfd->cwsr_enabled ?
891 				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
892 				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
893 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
894 		if (retval) {
895 			pr_err("destroy mqd failed\n");
896 			goto out_unlock;
897 		}
898 	}
899 
900 	mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
901 
902 	/*
903 	 * check active state vs. the previous state and modify
904 	 * counter accordingly. map_queues_cpsch uses the
905 	 * dqm->active_queue_count to determine whether a new runlist must be
906 	 * uploaded.
907 	 */
908 	if (q->properties.is_active && !prev_active) {
909 		increment_queue_count(dqm, &pdd->qpd, q);
910 	} else if (!q->properties.is_active && prev_active) {
911 		decrement_queue_count(dqm, &pdd->qpd, q);
912 	} else if (q->gws && !q->properties.is_gws) {
913 		if (q->properties.is_active) {
914 			dqm->gws_queue_count++;
915 			pdd->qpd.mapped_gws_queue = true;
916 		}
917 		q->properties.is_gws = true;
918 	} else if (!q->gws && q->properties.is_gws) {
919 		if (q->properties.is_active) {
920 			dqm->gws_queue_count--;
921 			pdd->qpd.mapped_gws_queue = false;
922 		}
923 		q->properties.is_gws = false;
924 	}
925 
926 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
927 		if (!dqm->dev->kfd->shared_resources.enable_mes)
928 			retval = map_queues_cpsch(dqm);
929 		else if (q->properties.is_active)
930 			retval = add_queue_mes(dqm, q, &pdd->qpd);
931 	} else if (q->properties.is_active &&
932 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
933 		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
934 		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
935 		if (WARN(q->process->mm != current->mm,
936 			 "should only run in user thread"))
937 			retval = -EFAULT;
938 		else
939 			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
940 						   q->pipe, q->queue,
941 						   &q->properties, current->mm);
942 	}
943 
944 out_unlock:
945 	dqm_unlock(dqm);
946 	return retval;
947 }
948 
949 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
950 					struct qcm_process_device *qpd)
951 {
952 	struct queue *q;
953 	struct mqd_manager *mqd_mgr;
954 	struct kfd_process_device *pdd;
955 	int retval, ret = 0;
956 
957 	dqm_lock(dqm);
958 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
959 		goto out;
960 
961 	pdd = qpd_to_pdd(qpd);
962 	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
963 			    pdd->process->pasid);
964 
965 	pdd->last_evict_timestamp = get_jiffies_64();
966 	/* Mark all queues as evicted. Deactivate all active queues on
967 	 * the qpd.
968 	 */
969 	list_for_each_entry(q, &qpd->queues_list, list) {
970 		q->properties.is_evicted = true;
971 		if (!q->properties.is_active)
972 			continue;
973 
974 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
975 				q->properties.type)];
976 		q->properties.is_active = false;
977 		decrement_queue_count(dqm, qpd, q);
978 
979 		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
980 			continue;
981 
982 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
983 				(dqm->dev->kfd->cwsr_enabled ?
984 				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
985 				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
986 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
987 		if (retval && !ret)
988 			/* Return the first error, but keep going to
989 			 * maintain a consistent eviction state
990 			 */
991 			ret = retval;
992 	}
993 
994 out:
995 	dqm_unlock(dqm);
996 	return ret;
997 }
998 
999 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
1000 				      struct qcm_process_device *qpd)
1001 {
1002 	struct queue *q;
1003 	struct kfd_process_device *pdd;
1004 	int retval = 0;
1005 
1006 	dqm_lock(dqm);
1007 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
1008 		goto out;
1009 
1010 	pdd = qpd_to_pdd(qpd);
1011 	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
1012 			    pdd->process->pasid);
1013 
1014 	/* Mark all queues as evicted. Deactivate all active queues on
1015 	 * the qpd.
1016 	 */
1017 	list_for_each_entry(q, &qpd->queues_list, list) {
1018 		q->properties.is_evicted = true;
1019 		if (!q->properties.is_active)
1020 			continue;
1021 
1022 		q->properties.is_active = false;
1023 		decrement_queue_count(dqm, qpd, q);
1024 
1025 		if (dqm->dev->kfd->shared_resources.enable_mes) {
1026 			retval = remove_queue_mes(dqm, q, qpd);
1027 			if (retval) {
1028 				pr_err("Failed to evict queue %d\n",
1029 					q->properties.queue_id);
1030 				goto out;
1031 			}
1032 		}
1033 	}
1034 	pdd->last_evict_timestamp = get_jiffies_64();
1035 	if (!dqm->dev->kfd->shared_resources.enable_mes)
1036 		retval = execute_queues_cpsch(dqm,
1037 					      qpd->is_debug ?
1038 					      KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
1039 					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1040 
1041 out:
1042 	dqm_unlock(dqm);
1043 	return retval;
1044 }
1045 
1046 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
1047 					  struct qcm_process_device *qpd)
1048 {
1049 	struct mm_struct *mm = NULL;
1050 	struct queue *q;
1051 	struct mqd_manager *mqd_mgr;
1052 	struct kfd_process_device *pdd;
1053 	uint64_t pd_base;
1054 	uint64_t eviction_duration;
1055 	int retval, ret = 0;
1056 
1057 	pdd = qpd_to_pdd(qpd);
1058 	/* Retrieve PD base */
1059 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1060 
1061 	dqm_lock(dqm);
1062 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1063 		goto out;
1064 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1065 		qpd->evicted--;
1066 		goto out;
1067 	}
1068 
1069 	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1070 			    pdd->process->pasid);
1071 
1072 	/* Update PD Base in QPD */
1073 	qpd->page_table_base = pd_base;
1074 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1075 
1076 	if (!list_empty(&qpd->queues_list)) {
1077 		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1078 				dqm->dev->adev,
1079 				qpd->vmid,
1080 				qpd->page_table_base);
1081 		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1082 	}
1083 
1084 	/* Take a safe reference to the mm_struct, which may otherwise
1085 	 * disappear even while the kfd_process is still referenced.
1086 	 */
1087 	mm = get_task_mm(pdd->process->lead_thread);
1088 	if (!mm) {
1089 		ret = -EFAULT;
1090 		goto out;
1091 	}
1092 
1093 	/* Remove the eviction flags. Activate queues that are not
1094 	 * inactive for other reasons.
1095 	 */
1096 	list_for_each_entry(q, &qpd->queues_list, list) {
1097 		q->properties.is_evicted = false;
1098 		if (!QUEUE_IS_ACTIVE(q->properties))
1099 			continue;
1100 
1101 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1102 				q->properties.type)];
1103 		q->properties.is_active = true;
1104 		increment_queue_count(dqm, qpd, q);
1105 
1106 		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
1107 			continue;
1108 
1109 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1110 				       q->queue, &q->properties, mm);
1111 		if (retval && !ret)
1112 			/* Return the first error, but keep going to
1113 			 * maintain a consistent eviction state
1114 			 */
1115 			ret = retval;
1116 	}
1117 	qpd->evicted = 0;
1118 	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1119 	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1120 out:
1121 	if (mm)
1122 		mmput(mm);
1123 	dqm_unlock(dqm);
1124 	return ret;
1125 }
1126 
1127 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
1128 					struct qcm_process_device *qpd)
1129 {
1130 	struct queue *q;
1131 	struct kfd_process_device *pdd;
1132 	uint64_t pd_base;
1133 	uint64_t eviction_duration;
1134 	int retval = 0;
1135 
1136 	pdd = qpd_to_pdd(qpd);
1137 	/* Retrieve PD base */
1138 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1139 
1140 	dqm_lock(dqm);
1141 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1142 		goto out;
1143 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1144 		qpd->evicted--;
1145 		goto out;
1146 	}
1147 
1148 	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1149 			    pdd->process->pasid);
1150 
1151 	/* Update PD Base in QPD */
1152 	qpd->page_table_base = pd_base;
1153 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1154 
1155 	/* activate all active queues on the qpd */
1156 	list_for_each_entry(q, &qpd->queues_list, list) {
1157 		q->properties.is_evicted = false;
1158 		if (!QUEUE_IS_ACTIVE(q->properties))
1159 			continue;
1160 
1161 		q->properties.is_active = true;
1162 		increment_queue_count(dqm, &pdd->qpd, q);
1163 
1164 		if (dqm->dev->kfd->shared_resources.enable_mes) {
1165 			retval = add_queue_mes(dqm, q, qpd);
1166 			if (retval) {
1167 				pr_err("Failed to restore queue %d\n",
1168 					q->properties.queue_id);
1169 				goto out;
1170 			}
1171 		}
1172 	}
1173 	if (!dqm->dev->kfd->shared_resources.enable_mes)
1174 		retval = execute_queues_cpsch(dqm,
1175 					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1176 	qpd->evicted = 0;
1177 	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1178 	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1179 out:
1180 	dqm_unlock(dqm);
1181 	return retval;
1182 }
1183 
1184 static int register_process(struct device_queue_manager *dqm,
1185 					struct qcm_process_device *qpd)
1186 {
1187 	struct device_process_node *n;
1188 	struct kfd_process_device *pdd;
1189 	uint64_t pd_base;
1190 	int retval;
1191 
1192 	n = kzalloc(sizeof(*n), GFP_KERNEL);
1193 	if (!n)
1194 		return -ENOMEM;
1195 
1196 	n->qpd = qpd;
1197 
1198 	pdd = qpd_to_pdd(qpd);
1199 	/* Retrieve PD base */
1200 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1201 
1202 	dqm_lock(dqm);
1203 	list_add(&n->list, &dqm->queues);
1204 
1205 	/* Update PD Base in QPD */
1206 	qpd->page_table_base = pd_base;
1207 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1208 
1209 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
1210 
1211 	dqm->processes_count++;
1212 
1213 	dqm_unlock(dqm);
1214 
1215 	/* Outside the DQM lock because under the DQM lock we can't do
1216 	 * reclaim or take other locks that others hold while reclaiming.
1217 	 */
1218 	kfd_inc_compute_active(dqm->dev);
1219 
1220 	return retval;
1221 }
1222 
1223 static int unregister_process(struct device_queue_manager *dqm,
1224 					struct qcm_process_device *qpd)
1225 {
1226 	int retval;
1227 	struct device_process_node *cur, *next;
1228 
1229 	pr_debug("qpd->queues_list is %s\n",
1230 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
1231 
1232 	retval = 0;
1233 	dqm_lock(dqm);
1234 
1235 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
1236 		if (qpd == cur->qpd) {
1237 			list_del(&cur->list);
1238 			kfree(cur);
1239 			dqm->processes_count--;
1240 			goto out;
1241 		}
1242 	}
1243 	/* qpd not found in dqm list */
1244 	retval = 1;
1245 out:
1246 	dqm_unlock(dqm);
1247 
1248 	/* Outside the DQM lock because under the DQM lock we can't do
1249 	 * reclaim or take other locks that others hold while reclaiming.
1250 	 */
1251 	if (!retval)
1252 		kfd_dec_compute_active(dqm->dev);
1253 
1254 	return retval;
1255 }
1256 
1257 static int
1258 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1259 			unsigned int vmid)
1260 {
1261 	int xcc = 0, ret;
1262 
1263 	for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
1264 		ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1265 						dqm->dev->adev, pasid, vmid,
1266 						dqm->dev->start_xcc_id + xcc);
1267 		if (ret)
1268 			break;
1269 	}
1270 
1271 	return ret;
1272 }
1273 
1274 static void init_interrupts(struct device_queue_manager *dqm)
1275 {
1276 	unsigned int i, xcc;
1277 
1278 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
1279 		if (is_pipe_enabled(dqm, 0, i)) {
1280 			for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
1281 				dqm->dev->kfd2kgd->init_interrupts(
1282 							dqm->dev->adev, i,
1283 							dqm->dev->start_xcc_id +
1284 							xcc);
1285 		}
1286 	}
1287 }
1288 
1289 static int initialize_nocpsch(struct device_queue_manager *dqm)
1290 {
1291 	int pipe, queue;
1292 
1293 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1294 
1295 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
1296 					sizeof(unsigned int), GFP_KERNEL);
1297 	if (!dqm->allocated_queues)
1298 		return -ENOMEM;
1299 
1300 	mutex_init(&dqm->lock_hidden);
1301 	INIT_LIST_HEAD(&dqm->queues);
1302 	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1303 	dqm->active_cp_queue_count = 0;
1304 	dqm->gws_queue_count = 0;
1305 
1306 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1307 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
1308 
1309 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
1310 			if (test_bit(pipe_offset + queue,
1311 				     dqm->dev->kfd->shared_resources.cp_queue_bitmap))
1312 				dqm->allocated_queues[pipe] |= 1 << queue;
1313 	}
1314 
1315 	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
1316 
1317 	init_sdma_bitmaps(dqm);
1318 
1319 	return 0;
1320 }
1321 
1322 static void uninitialize(struct device_queue_manager *dqm)
1323 {
1324 	int i;
1325 
1326 	WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1327 
1328 	kfree(dqm->allocated_queues);
1329 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1330 		kfree(dqm->mqd_mgrs[i]);
1331 	mutex_destroy(&dqm->lock_hidden);
1332 }
1333 
1334 static int start_nocpsch(struct device_queue_manager *dqm)
1335 {
1336 	int r = 0;
1337 
1338 	pr_info("SW scheduler is used");
1339 	init_interrupts(dqm);
1340 
1341 	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1342 		r = pm_init(&dqm->packet_mgr, dqm);
1343 	if (!r)
1344 		dqm->sched_running = true;
1345 
1346 	return r;
1347 }
1348 
1349 static int stop_nocpsch(struct device_queue_manager *dqm)
1350 {
1351 	dqm_lock(dqm);
1352 	if (!dqm->sched_running) {
1353 		dqm_unlock(dqm);
1354 		return 0;
1355 	}
1356 
1357 	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1358 		pm_uninit(&dqm->packet_mgr, false);
1359 	dqm->sched_running = false;
1360 	dqm_unlock(dqm);
1361 
1362 	return 0;
1363 }
1364 
1365 static void pre_reset(struct device_queue_manager *dqm)
1366 {
1367 	dqm_lock(dqm);
1368 	dqm->is_resetting = true;
1369 	dqm_unlock(dqm);
1370 }
1371 
1372 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1373 				struct queue *q, const uint32_t *restore_sdma_id)
1374 {
1375 	int bit;
1376 
1377 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1378 		if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
1379 			pr_err("No more SDMA queue to allocate\n");
1380 			return -ENOMEM;
1381 		}
1382 
1383 		if (restore_sdma_id) {
1384 			/* Re-use existing sdma_id */
1385 			if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
1386 				pr_err("SDMA queue already in use\n");
1387 				return -EBUSY;
1388 			}
1389 			clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
1390 			q->sdma_id = *restore_sdma_id;
1391 		} else {
1392 			/* Find first available sdma_id */
1393 			bit = find_first_bit(dqm->sdma_bitmap,
1394 					     get_num_sdma_queues(dqm));
1395 			clear_bit(bit, dqm->sdma_bitmap);
1396 			q->sdma_id = bit;
1397 		}
1398 
1399 		q->properties.sdma_engine_id =
1400 			q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
1401 		q->properties.sdma_queue_id = q->sdma_id /
1402 				kfd_get_num_sdma_engines(dqm->dev);
1403 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1404 		if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
1405 			pr_err("No more XGMI SDMA queue to allocate\n");
1406 			return -ENOMEM;
1407 		}
1408 		if (restore_sdma_id) {
1409 			/* Re-use existing sdma_id */
1410 			if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
1411 				pr_err("SDMA queue already in use\n");
1412 				return -EBUSY;
1413 			}
1414 			clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
1415 			q->sdma_id = *restore_sdma_id;
1416 		} else {
1417 			bit = find_first_bit(dqm->xgmi_sdma_bitmap,
1418 					     get_num_xgmi_sdma_queues(dqm));
1419 			clear_bit(bit, dqm->xgmi_sdma_bitmap);
1420 			q->sdma_id = bit;
1421 		}
1422 		/* sdma_engine_id is sdma id including
1423 		 * both PCIe-optimized SDMAs and XGMI-
1424 		 * optimized SDMAs. The calculation below
1425 		 * assumes the first N engines are always
1426 		 * PCIe-optimized ones
1427 		 */
1428 		q->properties.sdma_engine_id =
1429 			kfd_get_num_sdma_engines(dqm->dev) +
1430 			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1431 		q->properties.sdma_queue_id = q->sdma_id /
1432 			kfd_get_num_xgmi_sdma_engines(dqm->dev);
1433 	}
1434 
1435 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1436 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1437 
1438 	return 0;
1439 }
1440 
1441 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1442 				struct queue *q)
1443 {
1444 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1445 		if (q->sdma_id >= get_num_sdma_queues(dqm))
1446 			return;
1447 		set_bit(q->sdma_id, dqm->sdma_bitmap);
1448 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1449 		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1450 			return;
1451 		set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap);
1452 	}
1453 }
1454 
1455 /*
1456  * Device Queue Manager implementation for cp scheduler
1457  */
1458 
1459 static int set_sched_resources(struct device_queue_manager *dqm)
1460 {
1461 	int i, mec;
1462 	struct scheduling_resources res;
1463 
1464 	res.vmid_mask = dqm->dev->compute_vmid_bitmap;
1465 
1466 	res.queue_mask = 0;
1467 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1468 		mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
1469 			/ dqm->dev->kfd->shared_resources.num_pipe_per_mec;
1470 
1471 		if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
1472 			continue;
1473 
1474 		/* only acquire queues from the first MEC */
1475 		if (mec > 0)
1476 			continue;
1477 
1478 		/* This situation may be hit in the future if a new HW
1479 		 * generation exposes more than 64 queues. If so, the
1480 		 * definition of res.queue_mask needs updating
1481 		 */
1482 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1483 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1484 			break;
1485 		}
1486 
1487 		res.queue_mask |= 1ull
1488 			<< amdgpu_queue_mask_bit_to_set_resource_bit(
1489 				dqm->dev->adev, i);
1490 	}
1491 	res.gws_mask = ~0ull;
1492 	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1493 
1494 	pr_debug("Scheduling resources:\n"
1495 			"vmid mask: 0x%8X\n"
1496 			"queue mask: 0x%8llX\n",
1497 			res.vmid_mask, res.queue_mask);
1498 
1499 	return pm_send_set_resources(&dqm->packet_mgr, &res);
1500 }
1501 
1502 static int initialize_cpsch(struct device_queue_manager *dqm)
1503 {
1504 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1505 
1506 	mutex_init(&dqm->lock_hidden);
1507 	INIT_LIST_HEAD(&dqm->queues);
1508 	dqm->active_queue_count = dqm->processes_count = 0;
1509 	dqm->active_cp_queue_count = 0;
1510 	dqm->gws_queue_count = 0;
1511 	dqm->active_runlist = false;
1512 	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1513 
1514 	init_sdma_bitmaps(dqm);
1515 
1516 	return 0;
1517 }
1518 
1519 static int start_cpsch(struct device_queue_manager *dqm)
1520 {
1521 	int retval;
1522 
1523 	retval = 0;
1524 
1525 	dqm_lock(dqm);
1526 
1527 	if (!dqm->dev->kfd->shared_resources.enable_mes) {
1528 		retval = pm_init(&dqm->packet_mgr, dqm);
1529 		if (retval)
1530 			goto fail_packet_manager_init;
1531 
1532 		retval = set_sched_resources(dqm);
1533 		if (retval)
1534 			goto fail_set_sched_resources;
1535 	}
1536 	pr_debug("Allocating fence memory\n");
1537 
1538 	/* allocate fence memory on the gart */
1539 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1540 					&dqm->fence_mem);
1541 
1542 	if (retval)
1543 		goto fail_allocate_vidmem;
1544 
1545 	dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1546 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1547 
1548 	init_interrupts(dqm);
1549 
1550 	/* clear hang status when driver try to start the hw scheduler */
1551 	dqm->is_hws_hang = false;
1552 	dqm->is_resetting = false;
1553 	dqm->sched_running = true;
1554 	if (!dqm->dev->kfd->shared_resources.enable_mes)
1555 		execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1556 	dqm_unlock(dqm);
1557 
1558 	return 0;
1559 fail_allocate_vidmem:
1560 fail_set_sched_resources:
1561 	if (!dqm->dev->kfd->shared_resources.enable_mes)
1562 		pm_uninit(&dqm->packet_mgr, false);
1563 fail_packet_manager_init:
1564 	dqm_unlock(dqm);
1565 	return retval;
1566 }
1567 
1568 static int stop_cpsch(struct device_queue_manager *dqm)
1569 {
1570 	bool hanging;
1571 
1572 	dqm_lock(dqm);
1573 	if (!dqm->sched_running) {
1574 		dqm_unlock(dqm);
1575 		return 0;
1576 	}
1577 
1578 	if (!dqm->is_hws_hang) {
1579 		if (!dqm->dev->kfd->shared_resources.enable_mes)
1580 			unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
1581 		else
1582 			remove_all_queues_mes(dqm);
1583 	}
1584 
1585 	hanging = dqm->is_hws_hang || dqm->is_resetting;
1586 	dqm->sched_running = false;
1587 
1588 	if (!dqm->dev->kfd->shared_resources.enable_mes)
1589 		pm_release_ib(&dqm->packet_mgr);
1590 
1591 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1592 	if (!dqm->dev->kfd->shared_resources.enable_mes)
1593 		pm_uninit(&dqm->packet_mgr, hanging);
1594 	dqm_unlock(dqm);
1595 
1596 	return 0;
1597 }
1598 
1599 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1600 					struct kernel_queue *kq,
1601 					struct qcm_process_device *qpd)
1602 {
1603 	dqm_lock(dqm);
1604 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1605 		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1606 				dqm->total_queue_count);
1607 		dqm_unlock(dqm);
1608 		return -EPERM;
1609 	}
1610 
1611 	/*
1612 	 * Unconditionally increment this counter, regardless of the queue's
1613 	 * type or whether the queue is active.
1614 	 */
1615 	dqm->total_queue_count++;
1616 	pr_debug("Total of %d queues are accountable so far\n",
1617 			dqm->total_queue_count);
1618 
1619 	list_add(&kq->list, &qpd->priv_queue_list);
1620 	increment_queue_count(dqm, qpd, kq->queue);
1621 	qpd->is_debug = true;
1622 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1623 	dqm_unlock(dqm);
1624 
1625 	return 0;
1626 }
1627 
1628 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1629 					struct kernel_queue *kq,
1630 					struct qcm_process_device *qpd)
1631 {
1632 	dqm_lock(dqm);
1633 	list_del(&kq->list);
1634 	decrement_queue_count(dqm, qpd, kq->queue);
1635 	qpd->is_debug = false;
1636 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1637 	/*
1638 	 * Unconditionally decrement this counter, regardless of the queue's
1639 	 * type.
1640 	 */
1641 	dqm->total_queue_count--;
1642 	pr_debug("Total of %d queues are accountable so far\n",
1643 			dqm->total_queue_count);
1644 	dqm_unlock(dqm);
1645 }
1646 
1647 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1648 			struct qcm_process_device *qpd,
1649 			const struct kfd_criu_queue_priv_data *qd,
1650 			const void *restore_mqd, const void *restore_ctl_stack)
1651 {
1652 	int retval;
1653 	struct mqd_manager *mqd_mgr;
1654 
1655 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1656 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1657 				dqm->total_queue_count);
1658 		retval = -EPERM;
1659 		goto out;
1660 	}
1661 
1662 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1663 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1664 		dqm_lock(dqm);
1665 		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1666 		dqm_unlock(dqm);
1667 		if (retval)
1668 			goto out;
1669 	}
1670 
1671 	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
1672 	if (retval)
1673 		goto out_deallocate_sdma_queue;
1674 
1675 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1676 			q->properties.type)];
1677 
1678 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1679 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1680 		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1681 	q->properties.tba_addr = qpd->tba_addr;
1682 	q->properties.tma_addr = qpd->tma_addr;
1683 	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1684 	if (!q->mqd_mem_obj) {
1685 		retval = -ENOMEM;
1686 		goto out_deallocate_doorbell;
1687 	}
1688 
1689 	dqm_lock(dqm);
1690 	/*
1691 	 * Eviction state logic: mark all queues as evicted, even ones
1692 	 * not currently active. Restoring inactive queues later only
1693 	 * updates the is_evicted flag but is a no-op otherwise.
1694 	 */
1695 	q->properties.is_evicted = !!qpd->evicted;
1696 
1697 	if (qd)
1698 		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
1699 				     &q->properties, restore_mqd, restore_ctl_stack,
1700 				     qd->ctl_stack_size);
1701 	else
1702 		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1703 					&q->gart_mqd_addr, &q->properties);
1704 
1705 	list_add(&q->list, &qpd->queues_list);
1706 	qpd->queue_count++;
1707 
1708 	if (q->properties.is_active) {
1709 		increment_queue_count(dqm, qpd, q);
1710 
1711 		if (!dqm->dev->kfd->shared_resources.enable_mes)
1712 			retval = execute_queues_cpsch(dqm,
1713 					KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1714 		else
1715 			retval = add_queue_mes(dqm, q, qpd);
1716 		if (retval)
1717 			goto cleanup_queue;
1718 	}
1719 
1720 	/*
1721 	 * Unconditionally increment this counter, regardless of the queue's
1722 	 * type or whether the queue is active.
1723 	 */
1724 	dqm->total_queue_count++;
1725 
1726 	pr_debug("Total of %d queues are accountable so far\n",
1727 			dqm->total_queue_count);
1728 
1729 	dqm_unlock(dqm);
1730 	return retval;
1731 
1732 cleanup_queue:
1733 	qpd->queue_count--;
1734 	list_del(&q->list);
1735 	if (q->properties.is_active)
1736 		decrement_queue_count(dqm, qpd, q);
1737 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1738 	dqm_unlock(dqm);
1739 out_deallocate_doorbell:
1740 	deallocate_doorbell(qpd, q);
1741 out_deallocate_sdma_queue:
1742 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1743 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1744 		dqm_lock(dqm);
1745 		deallocate_sdma_queue(dqm, q);
1746 		dqm_unlock(dqm);
1747 	}
1748 out:
1749 	return retval;
1750 }
1751 
1752 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1753 				uint64_t fence_value,
1754 				unsigned int timeout_ms)
1755 {
1756 	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1757 
1758 	while (*fence_addr != fence_value) {
1759 		if (time_after(jiffies, end_jiffies)) {
1760 			pr_err("qcm fence wait loop timeout expired\n");
1761 			/* In HWS case, this is used to halt the driver thread
1762 			 * in order not to mess up CP states before doing
1763 			 * scandumps for FW debugging.
1764 			 */
1765 			while (halt_if_hws_hang)
1766 				schedule();
1767 
1768 			return -ETIME;
1769 		}
1770 		schedule();
1771 	}
1772 
1773 	return 0;
1774 }
1775 
1776 /* dqm->lock mutex has to be locked before calling this function */
1777 static int map_queues_cpsch(struct device_queue_manager *dqm)
1778 {
1779 	int retval;
1780 
1781 	if (!dqm->sched_running)
1782 		return 0;
1783 	if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1784 		return 0;
1785 	if (dqm->active_runlist)
1786 		return 0;
1787 
1788 	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1789 	pr_debug("%s sent runlist\n", __func__);
1790 	if (retval) {
1791 		pr_err("failed to execute runlist\n");
1792 		return retval;
1793 	}
1794 	dqm->active_runlist = true;
1795 
1796 	return retval;
1797 }
1798 
1799 /* dqm->lock mutex has to be locked before calling this function */
1800 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1801 				enum kfd_unmap_queues_filter filter,
1802 				uint32_t filter_param, bool reset)
1803 {
1804 	int retval = 0;
1805 	struct mqd_manager *mqd_mgr;
1806 
1807 	if (!dqm->sched_running)
1808 		return 0;
1809 	if (dqm->is_hws_hang || dqm->is_resetting)
1810 		return -EIO;
1811 	if (!dqm->active_runlist)
1812 		return retval;
1813 
1814 	retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1815 	if (retval)
1816 		return retval;
1817 
1818 	*dqm->fence_addr = KFD_FENCE_INIT;
1819 	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1820 				KFD_FENCE_COMPLETED);
1821 	/* should be timed out */
1822 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1823 				queue_preemption_timeout_ms);
1824 	if (retval) {
1825 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1826 		kfd_hws_hang(dqm);
1827 		return retval;
1828 	}
1829 
1830 	/* In the current MEC firmware implementation, if compute queue
1831 	 * doesn't response to the preemption request in time, HIQ will
1832 	 * abandon the unmap request without returning any timeout error
1833 	 * to driver. Instead, MEC firmware will log the doorbell of the
1834 	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1835 	 * To make sure the queue unmap was successful, driver need to
1836 	 * check those fields
1837 	 */
1838 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1839 	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1840 		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1841 		while (halt_if_hws_hang)
1842 			schedule();
1843 		return -ETIME;
1844 	}
1845 
1846 	pm_release_ib(&dqm->packet_mgr);
1847 	dqm->active_runlist = false;
1848 
1849 	return retval;
1850 }
1851 
1852 /* only for compute queue */
1853 static int reset_queues_cpsch(struct device_queue_manager *dqm,
1854 			uint16_t pasid)
1855 {
1856 	int retval;
1857 
1858 	dqm_lock(dqm);
1859 
1860 	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
1861 			pasid, true);
1862 
1863 	dqm_unlock(dqm);
1864 	return retval;
1865 }
1866 
1867 /* dqm->lock mutex has to be locked before calling this function */
1868 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1869 				enum kfd_unmap_queues_filter filter,
1870 				uint32_t filter_param)
1871 {
1872 	int retval;
1873 
1874 	if (dqm->is_hws_hang)
1875 		return -EIO;
1876 	retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
1877 	if (retval)
1878 		return retval;
1879 
1880 	return map_queues_cpsch(dqm);
1881 }
1882 
1883 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1884 				struct qcm_process_device *qpd,
1885 				struct queue *q)
1886 {
1887 	int retval;
1888 	struct mqd_manager *mqd_mgr;
1889 	uint64_t sdma_val = 0;
1890 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1891 
1892 	/* Get the SDMA queue stats */
1893 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1894 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1895 		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1896 							&sdma_val);
1897 		if (retval)
1898 			pr_err("Failed to read SDMA queue counter for queue: %d\n",
1899 				q->properties.queue_id);
1900 	}
1901 
1902 	retval = 0;
1903 
1904 	/* remove queue from list to prevent rescheduling after preemption */
1905 	dqm_lock(dqm);
1906 
1907 	if (qpd->is_debug) {
1908 		/*
1909 		 * error, currently we do not allow to destroy a queue
1910 		 * of a currently debugged process
1911 		 */
1912 		retval = -EBUSY;
1913 		goto failed_try_destroy_debugged_queue;
1914 
1915 	}
1916 
1917 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1918 			q->properties.type)];
1919 
1920 	deallocate_doorbell(qpd, q);
1921 
1922 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1923 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1924 		deallocate_sdma_queue(dqm, q);
1925 		pdd->sdma_past_activity_counter += sdma_val;
1926 	}
1927 
1928 	list_del(&q->list);
1929 	qpd->queue_count--;
1930 	if (q->properties.is_active) {
1931 		if (!dqm->dev->kfd->shared_resources.enable_mes) {
1932 			decrement_queue_count(dqm, qpd, q);
1933 			retval = execute_queues_cpsch(dqm,
1934 						      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1935 			if (retval == -ETIME)
1936 				qpd->reset_wavefronts = true;
1937 		} else {
1938 			retval = remove_queue_mes(dqm, q, qpd);
1939 		}
1940 	}
1941 
1942 	/*
1943 	 * Unconditionally decrement this counter, regardless of the queue's
1944 	 * type
1945 	 */
1946 	dqm->total_queue_count--;
1947 	pr_debug("Total of %d queues are accountable so far\n",
1948 			dqm->total_queue_count);
1949 
1950 	dqm_unlock(dqm);
1951 
1952 	/* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1953 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1954 
1955 	return retval;
1956 
1957 failed_try_destroy_debugged_queue:
1958 
1959 	dqm_unlock(dqm);
1960 	return retval;
1961 }
1962 
1963 /*
1964  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1965  * stay in user mode.
1966  */
1967 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1968 /* APE1 limit is inclusive and 64K aligned. */
1969 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1970 
1971 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1972 				   struct qcm_process_device *qpd,
1973 				   enum cache_policy default_policy,
1974 				   enum cache_policy alternate_policy,
1975 				   void __user *alternate_aperture_base,
1976 				   uint64_t alternate_aperture_size)
1977 {
1978 	bool retval = true;
1979 
1980 	if (!dqm->asic_ops.set_cache_memory_policy)
1981 		return retval;
1982 
1983 	dqm_lock(dqm);
1984 
1985 	if (alternate_aperture_size == 0) {
1986 		/* base > limit disables APE1 */
1987 		qpd->sh_mem_ape1_base = 1;
1988 		qpd->sh_mem_ape1_limit = 0;
1989 	} else {
1990 		/*
1991 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1992 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1993 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1994 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1995 		 * Verify that the base and size parameters can be
1996 		 * represented in this format and convert them.
1997 		 * Additionally restrict APE1 to user-mode addresses.
1998 		 */
1999 
2000 		uint64_t base = (uintptr_t)alternate_aperture_base;
2001 		uint64_t limit = base + alternate_aperture_size - 1;
2002 
2003 		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
2004 		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
2005 			retval = false;
2006 			goto out;
2007 		}
2008 
2009 		qpd->sh_mem_ape1_base = base >> 16;
2010 		qpd->sh_mem_ape1_limit = limit >> 16;
2011 	}
2012 
2013 	retval = dqm->asic_ops.set_cache_memory_policy(
2014 			dqm,
2015 			qpd,
2016 			default_policy,
2017 			alternate_policy,
2018 			alternate_aperture_base,
2019 			alternate_aperture_size);
2020 
2021 	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
2022 		program_sh_mem_settings(dqm, qpd);
2023 
2024 	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
2025 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
2026 		qpd->sh_mem_ape1_limit);
2027 
2028 out:
2029 	dqm_unlock(dqm);
2030 	return retval;
2031 }
2032 
2033 static int process_termination_nocpsch(struct device_queue_manager *dqm,
2034 		struct qcm_process_device *qpd)
2035 {
2036 	struct queue *q;
2037 	struct device_process_node *cur, *next_dpn;
2038 	int retval = 0;
2039 	bool found = false;
2040 
2041 	dqm_lock(dqm);
2042 
2043 	/* Clear all user mode queues */
2044 	while (!list_empty(&qpd->queues_list)) {
2045 		struct mqd_manager *mqd_mgr;
2046 		int ret;
2047 
2048 		q = list_first_entry(&qpd->queues_list, struct queue, list);
2049 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2050 				q->properties.type)];
2051 		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
2052 		if (ret)
2053 			retval = ret;
2054 		dqm_unlock(dqm);
2055 		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2056 		dqm_lock(dqm);
2057 	}
2058 
2059 	/* Unregister process */
2060 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2061 		if (qpd == cur->qpd) {
2062 			list_del(&cur->list);
2063 			kfree(cur);
2064 			dqm->processes_count--;
2065 			found = true;
2066 			break;
2067 		}
2068 	}
2069 
2070 	dqm_unlock(dqm);
2071 
2072 	/* Outside the DQM lock because under the DQM lock we can't do
2073 	 * reclaim or take other locks that others hold while reclaiming.
2074 	 */
2075 	if (found)
2076 		kfd_dec_compute_active(dqm->dev);
2077 
2078 	return retval;
2079 }
2080 
2081 static int get_wave_state(struct device_queue_manager *dqm,
2082 			  struct queue *q,
2083 			  void __user *ctl_stack,
2084 			  u32 *ctl_stack_used_size,
2085 			  u32 *save_area_used_size)
2086 {
2087 	struct mqd_manager *mqd_mgr;
2088 
2089 	dqm_lock(dqm);
2090 
2091 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2092 
2093 	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
2094 	    q->properties.is_active || !q->device->kfd->cwsr_enabled ||
2095 	    !mqd_mgr->get_wave_state) {
2096 		dqm_unlock(dqm);
2097 		return -EINVAL;
2098 	}
2099 
2100 	dqm_unlock(dqm);
2101 
2102 	/*
2103 	 * get_wave_state is outside the dqm lock to prevent circular locking
2104 	 * and the queue should be protected against destruction by the process
2105 	 * lock.
2106 	 */
2107 	return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties,
2108 			ctl_stack, ctl_stack_used_size, save_area_used_size);
2109 }
2110 
2111 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
2112 			const struct queue *q,
2113 			u32 *mqd_size,
2114 			u32 *ctl_stack_size)
2115 {
2116 	struct mqd_manager *mqd_mgr;
2117 	enum KFD_MQD_TYPE mqd_type =
2118 			get_mqd_type_from_queue_type(q->properties.type);
2119 
2120 	dqm_lock(dqm);
2121 	mqd_mgr = dqm->mqd_mgrs[mqd_type];
2122 	*mqd_size = mqd_mgr->mqd_size;
2123 	*ctl_stack_size = 0;
2124 
2125 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
2126 		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
2127 
2128 	dqm_unlock(dqm);
2129 }
2130 
2131 static int checkpoint_mqd(struct device_queue_manager *dqm,
2132 			  const struct queue *q,
2133 			  void *mqd,
2134 			  void *ctl_stack)
2135 {
2136 	struct mqd_manager *mqd_mgr;
2137 	int r = 0;
2138 	enum KFD_MQD_TYPE mqd_type =
2139 			get_mqd_type_from_queue_type(q->properties.type);
2140 
2141 	dqm_lock(dqm);
2142 
2143 	if (q->properties.is_active || !q->device->kfd->cwsr_enabled) {
2144 		r = -EINVAL;
2145 		goto dqm_unlock;
2146 	}
2147 
2148 	mqd_mgr = dqm->mqd_mgrs[mqd_type];
2149 	if (!mqd_mgr->checkpoint_mqd) {
2150 		r = -EOPNOTSUPP;
2151 		goto dqm_unlock;
2152 	}
2153 
2154 	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
2155 
2156 dqm_unlock:
2157 	dqm_unlock(dqm);
2158 	return r;
2159 }
2160 
2161 static int process_termination_cpsch(struct device_queue_manager *dqm,
2162 		struct qcm_process_device *qpd)
2163 {
2164 	int retval;
2165 	struct queue *q;
2166 	struct kernel_queue *kq, *kq_next;
2167 	struct mqd_manager *mqd_mgr;
2168 	struct device_process_node *cur, *next_dpn;
2169 	enum kfd_unmap_queues_filter filter =
2170 		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
2171 	bool found = false;
2172 
2173 	retval = 0;
2174 
2175 	dqm_lock(dqm);
2176 
2177 	/* Clean all kernel queues */
2178 	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
2179 		list_del(&kq->list);
2180 		decrement_queue_count(dqm, qpd, kq->queue);
2181 		qpd->is_debug = false;
2182 		dqm->total_queue_count--;
2183 		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
2184 	}
2185 
2186 	/* Clear all user mode queues */
2187 	list_for_each_entry(q, &qpd->queues_list, list) {
2188 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
2189 			deallocate_sdma_queue(dqm, q);
2190 		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
2191 			deallocate_sdma_queue(dqm, q);
2192 
2193 		if (q->properties.is_active) {
2194 			decrement_queue_count(dqm, qpd, q);
2195 
2196 			if (dqm->dev->kfd->shared_resources.enable_mes) {
2197 				retval = remove_queue_mes(dqm, q, qpd);
2198 				if (retval)
2199 					pr_err("Failed to remove queue %d\n",
2200 						q->properties.queue_id);
2201 			}
2202 		}
2203 
2204 		dqm->total_queue_count--;
2205 	}
2206 
2207 	/* Unregister process */
2208 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2209 		if (qpd == cur->qpd) {
2210 			list_del(&cur->list);
2211 			kfree(cur);
2212 			dqm->processes_count--;
2213 			found = true;
2214 			break;
2215 		}
2216 	}
2217 
2218 	if (!dqm->dev->kfd->shared_resources.enable_mes)
2219 		retval = execute_queues_cpsch(dqm, filter, 0);
2220 
2221 	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2222 		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
2223 		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
2224 		qpd->reset_wavefronts = false;
2225 	}
2226 
2227 	/* Lastly, free mqd resources.
2228 	 * Do free_mqd() after dqm_unlock to avoid circular locking.
2229 	 */
2230 	while (!list_empty(&qpd->queues_list)) {
2231 		q = list_first_entry(&qpd->queues_list, struct queue, list);
2232 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2233 				q->properties.type)];
2234 		list_del(&q->list);
2235 		qpd->queue_count--;
2236 		dqm_unlock(dqm);
2237 		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2238 		dqm_lock(dqm);
2239 	}
2240 	dqm_unlock(dqm);
2241 
2242 	/* Outside the DQM lock because under the DQM lock we can't do
2243 	 * reclaim or take other locks that others hold while reclaiming.
2244 	 */
2245 	if (found)
2246 		kfd_dec_compute_active(dqm->dev);
2247 
2248 	return retval;
2249 }
2250 
2251 static int init_mqd_managers(struct device_queue_manager *dqm)
2252 {
2253 	int i, j;
2254 	struct mqd_manager *mqd_mgr;
2255 
2256 	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
2257 		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
2258 		if (!mqd_mgr) {
2259 			pr_err("mqd manager [%d] initialization failed\n", i);
2260 			goto out_free;
2261 		}
2262 		dqm->mqd_mgrs[i] = mqd_mgr;
2263 	}
2264 
2265 	return 0;
2266 
2267 out_free:
2268 	for (j = 0; j < i; j++) {
2269 		kfree(dqm->mqd_mgrs[j]);
2270 		dqm->mqd_mgrs[j] = NULL;
2271 	}
2272 
2273 	return -ENOMEM;
2274 }
2275 
2276 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
2277 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
2278 {
2279 	int retval;
2280 	struct kfd_node *dev = dqm->dev;
2281 	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
2282 	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2283 		get_num_all_sdma_engines(dqm) *
2284 		dev->kfd->device_info.num_sdma_queues_per_engine +
2285 		(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
2286 		dqm->dev->num_xcc_per_node);
2287 
2288 	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
2289 		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
2290 		(void *)&(mem_obj->cpu_ptr), false);
2291 
2292 	return retval;
2293 }
2294 
2295 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
2296 {
2297 	struct device_queue_manager *dqm;
2298 
2299 	pr_debug("Loading device queue manager\n");
2300 
2301 	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2302 	if (!dqm)
2303 		return NULL;
2304 
2305 	switch (dev->adev->asic_type) {
2306 	/* HWS is not available on Hawaii. */
2307 	case CHIP_HAWAII:
2308 	/* HWS depends on CWSR for timely dequeue. CWSR is not
2309 	 * available on Tonga.
2310 	 *
2311 	 * FIXME: This argument also applies to Kaveri.
2312 	 */
2313 	case CHIP_TONGA:
2314 		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
2315 		break;
2316 	default:
2317 		dqm->sched_policy = sched_policy;
2318 		break;
2319 	}
2320 
2321 	dqm->dev = dev;
2322 	switch (dqm->sched_policy) {
2323 	case KFD_SCHED_POLICY_HWS:
2324 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
2325 		/* initialize dqm for cp scheduling */
2326 		dqm->ops.create_queue = create_queue_cpsch;
2327 		dqm->ops.initialize = initialize_cpsch;
2328 		dqm->ops.start = start_cpsch;
2329 		dqm->ops.stop = stop_cpsch;
2330 		dqm->ops.pre_reset = pre_reset;
2331 		dqm->ops.destroy_queue = destroy_queue_cpsch;
2332 		dqm->ops.update_queue = update_queue;
2333 		dqm->ops.register_process = register_process;
2334 		dqm->ops.unregister_process = unregister_process;
2335 		dqm->ops.uninitialize = uninitialize;
2336 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
2337 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
2338 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2339 		dqm->ops.process_termination = process_termination_cpsch;
2340 		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
2341 		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2342 		dqm->ops.get_wave_state = get_wave_state;
2343 		dqm->ops.reset_queues = reset_queues_cpsch;
2344 		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2345 		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2346 		break;
2347 	case KFD_SCHED_POLICY_NO_HWS:
2348 		/* initialize dqm for no cp scheduling */
2349 		dqm->ops.start = start_nocpsch;
2350 		dqm->ops.stop = stop_nocpsch;
2351 		dqm->ops.pre_reset = pre_reset;
2352 		dqm->ops.create_queue = create_queue_nocpsch;
2353 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
2354 		dqm->ops.update_queue = update_queue;
2355 		dqm->ops.register_process = register_process;
2356 		dqm->ops.unregister_process = unregister_process;
2357 		dqm->ops.initialize = initialize_nocpsch;
2358 		dqm->ops.uninitialize = uninitialize;
2359 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2360 		dqm->ops.process_termination = process_termination_nocpsch;
2361 		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
2362 		dqm->ops.restore_process_queues =
2363 			restore_process_queues_nocpsch;
2364 		dqm->ops.get_wave_state = get_wave_state;
2365 		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2366 		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2367 		break;
2368 	default:
2369 		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
2370 		goto out_free;
2371 	}
2372 
2373 	switch (dev->adev->asic_type) {
2374 	case CHIP_CARRIZO:
2375 		device_queue_manager_init_vi(&dqm->asic_ops);
2376 		break;
2377 
2378 	case CHIP_KAVERI:
2379 		device_queue_manager_init_cik(&dqm->asic_ops);
2380 		break;
2381 
2382 	case CHIP_HAWAII:
2383 		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
2384 		break;
2385 
2386 	case CHIP_TONGA:
2387 	case CHIP_FIJI:
2388 	case CHIP_POLARIS10:
2389 	case CHIP_POLARIS11:
2390 	case CHIP_POLARIS12:
2391 	case CHIP_VEGAM:
2392 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
2393 		break;
2394 
2395 	default:
2396 		if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
2397 			device_queue_manager_init_v11(&dqm->asic_ops);
2398 		else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
2399 			device_queue_manager_init_v10_navi10(&dqm->asic_ops);
2400 		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
2401 			device_queue_manager_init_v9(&dqm->asic_ops);
2402 		else {
2403 			WARN(1, "Unexpected ASIC family %u",
2404 			     dev->adev->asic_type);
2405 			goto out_free;
2406 		}
2407 	}
2408 
2409 	if (init_mqd_managers(dqm))
2410 		goto out_free;
2411 
2412 	if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
2413 		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
2414 		goto out_free;
2415 	}
2416 
2417 	if (!dqm->ops.initialize(dqm))
2418 		return dqm;
2419 
2420 out_free:
2421 	kfree(dqm);
2422 	return NULL;
2423 }
2424 
2425 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
2426 				    struct kfd_mem_obj *mqd)
2427 {
2428 	WARN(!mqd, "No hiq sdma mqd trunk to free");
2429 
2430 	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
2431 }
2432 
2433 void device_queue_manager_uninit(struct device_queue_manager *dqm)
2434 {
2435 	dqm->ops.stop(dqm);
2436 	dqm->ops.uninitialize(dqm);
2437 	if (!dqm->dev->kfd->shared_resources.enable_mes)
2438 		deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2439 	kfree(dqm);
2440 }
2441 
2442 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
2443 {
2444 	struct kfd_process_device *pdd;
2445 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2446 	int ret = 0;
2447 
2448 	if (!p)
2449 		return -EINVAL;
2450 	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2451 	pdd = kfd_get_process_device_data(dqm->dev, p);
2452 	if (pdd)
2453 		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2454 	kfd_unref_process(p);
2455 
2456 	return ret;
2457 }
2458 
2459 static void kfd_process_hw_exception(struct work_struct *work)
2460 {
2461 	struct device_queue_manager *dqm = container_of(work,
2462 			struct device_queue_manager, hw_exception_work);
2463 	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2464 }
2465 
2466 #if defined(CONFIG_DEBUG_FS)
2467 
2468 static void seq_reg_dump(struct seq_file *m,
2469 			 uint32_t (*dump)[2], uint32_t n_regs)
2470 {
2471 	uint32_t i, count;
2472 
2473 	for (i = 0, count = 0; i < n_regs; i++) {
2474 		if (count == 0 ||
2475 		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2476 			seq_printf(m, "%s    %08x: %08x",
2477 				   i ? "\n" : "",
2478 				   dump[i][0], dump[i][1]);
2479 			count = 7;
2480 		} else {
2481 			seq_printf(m, " %08x", dump[i][1]);
2482 			count--;
2483 		}
2484 	}
2485 
2486 	seq_puts(m, "\n");
2487 }
2488 
2489 int dqm_debugfs_hqds(struct seq_file *m, void *data)
2490 {
2491 	struct device_queue_manager *dqm = data;
2492 	uint32_t (*dump)[2], n_regs;
2493 	int pipe, queue;
2494 	int r = 0, xcc;
2495 	uint32_t inst;
2496 	uint32_t sdma_engine_start;
2497 
2498 	if (!dqm->sched_running) {
2499 		seq_puts(m, " Device is stopped\n");
2500 		return 0;
2501 	}
2502 
2503 	for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
2504 		inst = dqm->dev->start_xcc_id + xcc;
2505 		r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
2506 					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2507 					&dump, &n_regs, inst);
2508 		if (!r) {
2509 			seq_printf(m,
2510 				"   Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
2511 				inst, KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2512 				KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2513 				KFD_CIK_HIQ_QUEUE);
2514 			seq_reg_dump(m, dump, n_regs);
2515 
2516 			kfree(dump);
2517 		}
2518 
2519 		for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2520 			int pipe_offset = pipe * get_queues_per_pipe(dqm);
2521 
2522 			for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2523 				if (!test_bit(pipe_offset + queue,
2524 				      dqm->dev->kfd->shared_resources.cp_queue_bitmap))
2525 					continue;
2526 
2527 				r = dqm->dev->kfd2kgd->hqd_dump(
2528 					dqm->dev->adev, pipe, queue, &dump, &n_regs, inst);
2529 				if (r)
2530 					break;
2531 
2532 				seq_printf(m, " Inst %d,  CP Pipe %d, Queue %d\n",
2533 					  inst, pipe, queue);
2534 				seq_reg_dump(m, dump, n_regs);
2535 
2536 				kfree(dump);
2537 			}
2538 		}
2539 	}
2540 
2541 	sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
2542 	for (pipe = sdma_engine_start;
2543 	     pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
2544 	     pipe++) {
2545 		for (queue = 0;
2546 		     queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
2547 		     queue++) {
2548 			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2549 				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2550 			if (r)
2551 				break;
2552 
2553 			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
2554 				  pipe, queue);
2555 			seq_reg_dump(m, dump, n_regs);
2556 
2557 			kfree(dump);
2558 		}
2559 	}
2560 
2561 	return r;
2562 }
2563 
2564 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2565 {
2566 	int r = 0;
2567 
2568 	dqm_lock(dqm);
2569 	r = pm_debugfs_hang_hws(&dqm->packet_mgr);
2570 	if (r) {
2571 		dqm_unlock(dqm);
2572 		return r;
2573 	}
2574 	dqm->active_runlist = true;
2575 	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2576 	dqm_unlock(dqm);
2577 
2578 	return r;
2579 }
2580 
2581 #endif
2582