xref: /openbmc/linux/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c (revision 0af5cb349a2c97fbabb3cede96efcde9d54b7940)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/ratelimit.h>
26 #include <linux/printk.h>
27 #include <linux/slab.h>
28 #include <linux/list.h>
29 #include <linux/types.h>
30 #include <linux/bitops.h>
31 #include <linux/sched.h>
32 #include "kfd_priv.h"
33 #include "kfd_device_queue_manager.h"
34 #include "kfd_mqd_manager.h"
35 #include "cik_regs.h"
36 #include "kfd_kernel_queue.h"
37 #include "amdgpu_amdkfd.h"
38 #include "mes_api_def.h"
39 
40 /* Size of the per-pipe EOP queue */
41 #define CIK_HPD_EOP_BYTES_LOG2 11
42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
43 
44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
45 				  u32 pasid, unsigned int vmid);
46 
47 static int execute_queues_cpsch(struct device_queue_manager *dqm,
48 				enum kfd_unmap_queues_filter filter,
49 				uint32_t filter_param);
50 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
51 				enum kfd_unmap_queues_filter filter,
52 				uint32_t filter_param, bool reset);
53 
54 static int map_queues_cpsch(struct device_queue_manager *dqm);
55 
56 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
57 				struct queue *q);
58 
59 static inline void deallocate_hqd(struct device_queue_manager *dqm,
60 				struct queue *q);
61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
62 static int allocate_sdma_queue(struct device_queue_manager *dqm,
63 				struct queue *q, const uint32_t *restore_sdma_id);
64 static void kfd_process_hw_exception(struct work_struct *work);
65 
66 static inline
67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
68 {
69 	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
70 		return KFD_MQD_TYPE_SDMA;
71 	return KFD_MQD_TYPE_CP;
72 }
73 
74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
75 {
76 	int i;
77 	int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
78 		+ pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
79 
80 	/* queue is available for KFD usage if bit is 1 */
81 	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
82 		if (test_bit(pipe_offset + i,
83 			      dqm->dev->shared_resources.cp_queue_bitmap))
84 			return true;
85 	return false;
86 }
87 
88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
89 {
90 	return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
91 				KGD_MAX_QUEUES);
92 }
93 
94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
95 {
96 	return dqm->dev->shared_resources.num_queue_per_pipe;
97 }
98 
99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
100 {
101 	return dqm->dev->shared_resources.num_pipe_per_mec;
102 }
103 
104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
105 {
106 	return kfd_get_num_sdma_engines(dqm->dev) +
107 		kfd_get_num_xgmi_sdma_engines(dqm->dev);
108 }
109 
110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
111 {
112 	return kfd_get_num_sdma_engines(dqm->dev) *
113 		dqm->dev->device_info.num_sdma_queues_per_engine;
114 }
115 
116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
117 {
118 	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
119 		dqm->dev->device_info.num_sdma_queues_per_engine;
120 }
121 
122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
123 {
124 	return dqm->dev->device_info.reserved_sdma_queues_bitmap;
125 }
126 
127 void program_sh_mem_settings(struct device_queue_manager *dqm,
128 					struct qcm_process_device *qpd)
129 {
130 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
131 						dqm->dev->adev, qpd->vmid,
132 						qpd->sh_mem_config,
133 						qpd->sh_mem_ape1_base,
134 						qpd->sh_mem_ape1_limit,
135 						qpd->sh_mem_bases);
136 }
137 
138 static void kfd_hws_hang(struct device_queue_manager *dqm)
139 {
140 	/*
141 	 * Issue a GPU reset if HWS is unresponsive
142 	 */
143 	dqm->is_hws_hang = true;
144 
145 	/* It's possible we're detecting a HWS hang in the
146 	 * middle of a GPU reset. No need to schedule another
147 	 * reset in this case.
148 	 */
149 	if (!dqm->is_resetting)
150 		schedule_work(&dqm->hw_exception_work);
151 }
152 
153 static int convert_to_mes_queue_type(int queue_type)
154 {
155 	int mes_queue_type;
156 
157 	switch (queue_type) {
158 	case KFD_QUEUE_TYPE_COMPUTE:
159 		mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
160 		break;
161 	case KFD_QUEUE_TYPE_SDMA:
162 		mes_queue_type = MES_QUEUE_TYPE_SDMA;
163 		break;
164 	default:
165 		WARN(1, "Invalid queue type %d", queue_type);
166 		mes_queue_type = -EINVAL;
167 		break;
168 	}
169 
170 	return mes_queue_type;
171 }
172 
173 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
174 			 struct qcm_process_device *qpd)
175 {
176 	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
177 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
178 	struct mes_add_queue_input queue_input;
179 	int r, queue_type;
180 	uint64_t wptr_addr_off;
181 
182 	if (dqm->is_hws_hang)
183 		return -EIO;
184 
185 	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
186 	queue_input.process_id = qpd->pqm->process->pasid;
187 	queue_input.page_table_base_addr =  qpd->page_table_base;
188 	queue_input.process_va_start = 0;
189 	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
190 	/* MES unit for quantum is 100ns */
191 	queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
192 	queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
193 	queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
194 	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
195 	queue_input.inprocess_gang_priority = q->properties.priority;
196 	queue_input.gang_global_priority_level =
197 					AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
198 	queue_input.doorbell_offset = q->properties.doorbell_off;
199 	queue_input.mqd_addr = q->gart_mqd_addr;
200 	queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
201 
202 	if (q->wptr_bo) {
203 		wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
204 		queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
205 	}
206 
207 	queue_input.is_kfd_process = 1;
208 
209 	queue_input.paging = false;
210 	queue_input.tba_addr = qpd->tba_addr;
211 	queue_input.tma_addr = qpd->tma_addr;
212 
213 	queue_type = convert_to_mes_queue_type(q->properties.type);
214 	if (queue_type < 0) {
215 		pr_err("Queue type not supported with MES, queue:%d\n",
216 				q->properties.type);
217 		return -EINVAL;
218 	}
219 	queue_input.queue_type = (uint32_t)queue_type;
220 
221 	if (q->gws) {
222 		queue_input.gws_base = 0;
223 		queue_input.gws_size = qpd->num_gws;
224 	}
225 
226 	amdgpu_mes_lock(&adev->mes);
227 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
228 	amdgpu_mes_unlock(&adev->mes);
229 	if (r) {
230 		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
231 			q->properties.doorbell_off);
232 		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
233 		kfd_hws_hang(dqm);
234 }
235 
236 	return r;
237 }
238 
239 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
240 			struct qcm_process_device *qpd)
241 {
242 	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
243 	int r;
244 	struct mes_remove_queue_input queue_input;
245 
246 	if (dqm->is_hws_hang)
247 		return -EIO;
248 
249 	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
250 	queue_input.doorbell_offset = q->properties.doorbell_off;
251 	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
252 
253 	amdgpu_mes_lock(&adev->mes);
254 	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
255 	amdgpu_mes_unlock(&adev->mes);
256 
257 	if (r) {
258 		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
259 			q->properties.doorbell_off);
260 		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
261 		kfd_hws_hang(dqm);
262 	}
263 
264 	return r;
265 }
266 
267 static int remove_all_queues_mes(struct device_queue_manager *dqm)
268 {
269 	struct device_process_node *cur;
270 	struct qcm_process_device *qpd;
271 	struct queue *q;
272 	int retval = 0;
273 
274 	list_for_each_entry(cur, &dqm->queues, list) {
275 		qpd = cur->qpd;
276 		list_for_each_entry(q, &qpd->queues_list, list) {
277 			if (q->properties.is_active) {
278 				retval = remove_queue_mes(dqm, q, qpd);
279 				if (retval) {
280 					pr_err("%s: Failed to remove queue %d for dev %d",
281 						__func__,
282 						q->properties.queue_id,
283 						dqm->dev->id);
284 					return retval;
285 				}
286 			}
287 		}
288 	}
289 
290 	return retval;
291 }
292 
293 static void increment_queue_count(struct device_queue_manager *dqm,
294 				  struct qcm_process_device *qpd,
295 				  struct queue *q)
296 {
297 	dqm->active_queue_count++;
298 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
299 	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
300 		dqm->active_cp_queue_count++;
301 
302 	if (q->properties.is_gws) {
303 		dqm->gws_queue_count++;
304 		qpd->mapped_gws_queue = true;
305 	}
306 }
307 
308 static void decrement_queue_count(struct device_queue_manager *dqm,
309 				  struct qcm_process_device *qpd,
310 				  struct queue *q)
311 {
312 	dqm->active_queue_count--;
313 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
314 	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
315 		dqm->active_cp_queue_count--;
316 
317 	if (q->properties.is_gws) {
318 		dqm->gws_queue_count--;
319 		qpd->mapped_gws_queue = false;
320 	}
321 }
322 
323 /*
324  * Allocate a doorbell ID to this queue.
325  * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
326  */
327 static int allocate_doorbell(struct qcm_process_device *qpd,
328 			     struct queue *q,
329 			     uint32_t const *restore_id)
330 {
331 	struct kfd_dev *dev = qpd->dqm->dev;
332 
333 	if (!KFD_IS_SOC15(dev)) {
334 		/* On pre-SOC15 chips we need to use the queue ID to
335 		 * preserve the user mode ABI.
336 		 */
337 
338 		if (restore_id && *restore_id != q->properties.queue_id)
339 			return -EINVAL;
340 
341 		q->doorbell_id = q->properties.queue_id;
342 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
343 			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
344 		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
345 		 * doorbell assignments based on the engine and queue id.
346 		 * The doobell index distance between RLC (2*i) and (2*i+1)
347 		 * for a SDMA engine is 512.
348 		 */
349 
350 		uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
351 		uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
352 						+ (q->properties.sdma_queue_id & 1)
353 						* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
354 						+ (q->properties.sdma_queue_id >> 1);
355 
356 		if (restore_id && *restore_id != valid_id)
357 			return -EINVAL;
358 		q->doorbell_id = valid_id;
359 	} else {
360 		/* For CP queues on SOC15 */
361 		if (restore_id) {
362 			/* make sure that ID is free  */
363 			if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
364 				return -EINVAL;
365 
366 			q->doorbell_id = *restore_id;
367 		} else {
368 			/* or reserve a free doorbell ID */
369 			unsigned int found;
370 
371 			found = find_first_zero_bit(qpd->doorbell_bitmap,
372 						KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
373 			if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
374 				pr_debug("No doorbells available");
375 				return -EBUSY;
376 			}
377 			set_bit(found, qpd->doorbell_bitmap);
378 			q->doorbell_id = found;
379 		}
380 	}
381 
382 	q->properties.doorbell_off =
383 		kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
384 					  q->doorbell_id);
385 	return 0;
386 }
387 
388 static void deallocate_doorbell(struct qcm_process_device *qpd,
389 				struct queue *q)
390 {
391 	unsigned int old;
392 	struct kfd_dev *dev = qpd->dqm->dev;
393 
394 	if (!KFD_IS_SOC15(dev) ||
395 	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
396 	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
397 		return;
398 
399 	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
400 	WARN_ON(!old);
401 }
402 
403 static void program_trap_handler_settings(struct device_queue_manager *dqm,
404 				struct qcm_process_device *qpd)
405 {
406 	if (dqm->dev->kfd2kgd->program_trap_handler_settings)
407 		dqm->dev->kfd2kgd->program_trap_handler_settings(
408 						dqm->dev->adev, qpd->vmid,
409 						qpd->tba_addr, qpd->tma_addr);
410 }
411 
412 static int allocate_vmid(struct device_queue_manager *dqm,
413 			struct qcm_process_device *qpd,
414 			struct queue *q)
415 {
416 	int allocated_vmid = -1, i;
417 
418 	for (i = dqm->dev->vm_info.first_vmid_kfd;
419 			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
420 		if (!dqm->vmid_pasid[i]) {
421 			allocated_vmid = i;
422 			break;
423 		}
424 	}
425 
426 	if (allocated_vmid < 0) {
427 		pr_err("no more vmid to allocate\n");
428 		return -ENOSPC;
429 	}
430 
431 	pr_debug("vmid allocated: %d\n", allocated_vmid);
432 
433 	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
434 
435 	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
436 
437 	qpd->vmid = allocated_vmid;
438 	q->properties.vmid = allocated_vmid;
439 
440 	program_sh_mem_settings(dqm, qpd);
441 
442 	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
443 		program_trap_handler_settings(dqm, qpd);
444 
445 	/* qpd->page_table_base is set earlier when register_process()
446 	 * is called, i.e. when the first queue is created.
447 	 */
448 	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
449 			qpd->vmid,
450 			qpd->page_table_base);
451 	/* invalidate the VM context after pasid and vmid mapping is set up */
452 	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
453 
454 	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
455 		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
456 				qpd->sh_hidden_private_base, qpd->vmid);
457 
458 	return 0;
459 }
460 
461 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
462 				struct qcm_process_device *qpd)
463 {
464 	const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
465 	int ret;
466 
467 	if (!qpd->ib_kaddr)
468 		return -ENOMEM;
469 
470 	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
471 	if (ret)
472 		return ret;
473 
474 	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
475 				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
476 				pmf->release_mem_size / sizeof(uint32_t));
477 }
478 
479 static void deallocate_vmid(struct device_queue_manager *dqm,
480 				struct qcm_process_device *qpd,
481 				struct queue *q)
482 {
483 	/* On GFX v7, CP doesn't flush TC at dequeue */
484 	if (q->device->adev->asic_type == CHIP_HAWAII)
485 		if (flush_texture_cache_nocpsch(q->device, qpd))
486 			pr_err("Failed to flush TC\n");
487 
488 	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
489 
490 	/* Release the vmid mapping */
491 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
492 	dqm->vmid_pasid[qpd->vmid] = 0;
493 
494 	qpd->vmid = 0;
495 	q->properties.vmid = 0;
496 }
497 
498 static int create_queue_nocpsch(struct device_queue_manager *dqm,
499 				struct queue *q,
500 				struct qcm_process_device *qpd,
501 				const struct kfd_criu_queue_priv_data *qd,
502 				const void *restore_mqd, const void *restore_ctl_stack)
503 {
504 	struct mqd_manager *mqd_mgr;
505 	int retval;
506 
507 	dqm_lock(dqm);
508 
509 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
510 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
511 				dqm->total_queue_count);
512 		retval = -EPERM;
513 		goto out_unlock;
514 	}
515 
516 	if (list_empty(&qpd->queues_list)) {
517 		retval = allocate_vmid(dqm, qpd, q);
518 		if (retval)
519 			goto out_unlock;
520 	}
521 	q->properties.vmid = qpd->vmid;
522 	/*
523 	 * Eviction state logic: mark all queues as evicted, even ones
524 	 * not currently active. Restoring inactive queues later only
525 	 * updates the is_evicted flag but is a no-op otherwise.
526 	 */
527 	q->properties.is_evicted = !!qpd->evicted;
528 
529 	q->properties.tba_addr = qpd->tba_addr;
530 	q->properties.tma_addr = qpd->tma_addr;
531 
532 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
533 			q->properties.type)];
534 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
535 		retval = allocate_hqd(dqm, q);
536 		if (retval)
537 			goto deallocate_vmid;
538 		pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
539 			q->pipe, q->queue);
540 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
541 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
542 		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
543 		if (retval)
544 			goto deallocate_vmid;
545 		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
546 	}
547 
548 	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
549 	if (retval)
550 		goto out_deallocate_hqd;
551 
552 	/* Temporarily release dqm lock to avoid a circular lock dependency */
553 	dqm_unlock(dqm);
554 	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
555 	dqm_lock(dqm);
556 
557 	if (!q->mqd_mem_obj) {
558 		retval = -ENOMEM;
559 		goto out_deallocate_doorbell;
560 	}
561 
562 	if (qd)
563 		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
564 				     &q->properties, restore_mqd, restore_ctl_stack,
565 				     qd->ctl_stack_size);
566 	else
567 		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
568 					&q->gart_mqd_addr, &q->properties);
569 
570 	if (q->properties.is_active) {
571 		if (!dqm->sched_running) {
572 			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
573 			goto add_queue_to_list;
574 		}
575 
576 		if (WARN(q->process->mm != current->mm,
577 					"should only run in user thread"))
578 			retval = -EFAULT;
579 		else
580 			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
581 					q->queue, &q->properties, current->mm);
582 		if (retval)
583 			goto out_free_mqd;
584 	}
585 
586 add_queue_to_list:
587 	list_add(&q->list, &qpd->queues_list);
588 	qpd->queue_count++;
589 	if (q->properties.is_active)
590 		increment_queue_count(dqm, qpd, q);
591 
592 	/*
593 	 * Unconditionally increment this counter, regardless of the queue's
594 	 * type or whether the queue is active.
595 	 */
596 	dqm->total_queue_count++;
597 	pr_debug("Total of %d queues are accountable so far\n",
598 			dqm->total_queue_count);
599 	goto out_unlock;
600 
601 out_free_mqd:
602 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
603 out_deallocate_doorbell:
604 	deallocate_doorbell(qpd, q);
605 out_deallocate_hqd:
606 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
607 		deallocate_hqd(dqm, q);
608 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
609 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
610 		deallocate_sdma_queue(dqm, q);
611 deallocate_vmid:
612 	if (list_empty(&qpd->queues_list))
613 		deallocate_vmid(dqm, qpd, q);
614 out_unlock:
615 	dqm_unlock(dqm);
616 	return retval;
617 }
618 
619 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
620 {
621 	bool set;
622 	int pipe, bit, i;
623 
624 	set = false;
625 
626 	for (pipe = dqm->next_pipe_to_allocate, i = 0;
627 			i < get_pipes_per_mec(dqm);
628 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
629 
630 		if (!is_pipe_enabled(dqm, 0, pipe))
631 			continue;
632 
633 		if (dqm->allocated_queues[pipe] != 0) {
634 			bit = ffs(dqm->allocated_queues[pipe]) - 1;
635 			dqm->allocated_queues[pipe] &= ~(1 << bit);
636 			q->pipe = pipe;
637 			q->queue = bit;
638 			set = true;
639 			break;
640 		}
641 	}
642 
643 	if (!set)
644 		return -EBUSY;
645 
646 	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
647 	/* horizontal hqd allocation */
648 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
649 
650 	return 0;
651 }
652 
653 static inline void deallocate_hqd(struct device_queue_manager *dqm,
654 				struct queue *q)
655 {
656 	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
657 }
658 
659 #define SQ_IND_CMD_CMD_KILL		0x00000003
660 #define SQ_IND_CMD_MODE_BROADCAST	0x00000001
661 
662 static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
663 {
664 	int status = 0;
665 	unsigned int vmid;
666 	uint16_t queried_pasid;
667 	union SQ_CMD_BITS reg_sq_cmd;
668 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
669 	struct kfd_process_device *pdd;
670 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
671 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
672 
673 	reg_sq_cmd.u32All = 0;
674 	reg_gfx_index.u32All = 0;
675 
676 	pr_debug("Killing all process wavefronts\n");
677 
678 	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
679 		pr_err("no vmid pasid mapping supported \n");
680 		return -EOPNOTSUPP;
681 	}
682 
683 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
684 	 * ATC_VMID15_PASID_MAPPING
685 	 * to check which VMID the current process is mapped to.
686 	 */
687 
688 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
689 		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
690 				(dev->adev, vmid, &queried_pasid);
691 
692 		if (status && queried_pasid == p->pasid) {
693 			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
694 					vmid, p->pasid);
695 			break;
696 		}
697 	}
698 
699 	if (vmid > last_vmid_to_scan) {
700 		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
701 		return -EFAULT;
702 	}
703 
704 	/* taking the VMID for that process on the safe way using PDD */
705 	pdd = kfd_get_process_device_data(dev, p);
706 	if (!pdd)
707 		return -EFAULT;
708 
709 	reg_gfx_index.bits.sh_broadcast_writes = 1;
710 	reg_gfx_index.bits.se_broadcast_writes = 1;
711 	reg_gfx_index.bits.instance_broadcast_writes = 1;
712 	reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
713 	reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
714 	reg_sq_cmd.bits.vm_id = vmid;
715 
716 	dev->kfd2kgd->wave_control_execute(dev->adev,
717 					reg_gfx_index.u32All,
718 					reg_sq_cmd.u32All);
719 
720 	return 0;
721 }
722 
723 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
724  * to avoid asynchronized access
725  */
726 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
727 				struct qcm_process_device *qpd,
728 				struct queue *q)
729 {
730 	int retval;
731 	struct mqd_manager *mqd_mgr;
732 
733 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
734 			q->properties.type)];
735 
736 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
737 		deallocate_hqd(dqm, q);
738 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
739 		deallocate_sdma_queue(dqm, q);
740 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
741 		deallocate_sdma_queue(dqm, q);
742 	else {
743 		pr_debug("q->properties.type %d is invalid\n",
744 				q->properties.type);
745 		return -EINVAL;
746 	}
747 	dqm->total_queue_count--;
748 
749 	deallocate_doorbell(qpd, q);
750 
751 	if (!dqm->sched_running) {
752 		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
753 		return 0;
754 	}
755 
756 	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
757 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
758 				KFD_UNMAP_LATENCY_MS,
759 				q->pipe, q->queue);
760 	if (retval == -ETIME)
761 		qpd->reset_wavefronts = true;
762 
763 	list_del(&q->list);
764 	if (list_empty(&qpd->queues_list)) {
765 		if (qpd->reset_wavefronts) {
766 			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
767 					dqm->dev);
768 			/* dbgdev_wave_reset_wavefronts has to be called before
769 			 * deallocate_vmid(), i.e. when vmid is still in use.
770 			 */
771 			dbgdev_wave_reset_wavefronts(dqm->dev,
772 					qpd->pqm->process);
773 			qpd->reset_wavefronts = false;
774 		}
775 
776 		deallocate_vmid(dqm, qpd, q);
777 	}
778 	qpd->queue_count--;
779 	if (q->properties.is_active)
780 		decrement_queue_count(dqm, qpd, q);
781 
782 	return retval;
783 }
784 
785 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
786 				struct qcm_process_device *qpd,
787 				struct queue *q)
788 {
789 	int retval;
790 	uint64_t sdma_val = 0;
791 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
792 	struct mqd_manager *mqd_mgr =
793 		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
794 
795 	/* Get the SDMA queue stats */
796 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
797 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
798 		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
799 							&sdma_val);
800 		if (retval)
801 			pr_err("Failed to read SDMA queue counter for queue: %d\n",
802 				q->properties.queue_id);
803 	}
804 
805 	dqm_lock(dqm);
806 	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
807 	if (!retval)
808 		pdd->sdma_past_activity_counter += sdma_val;
809 	dqm_unlock(dqm);
810 
811 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
812 
813 	return retval;
814 }
815 
816 static int update_queue(struct device_queue_manager *dqm, struct queue *q,
817 			struct mqd_update_info *minfo)
818 {
819 	int retval = 0;
820 	struct mqd_manager *mqd_mgr;
821 	struct kfd_process_device *pdd;
822 	bool prev_active = false;
823 
824 	dqm_lock(dqm);
825 	pdd = kfd_get_process_device_data(q->device, q->process);
826 	if (!pdd) {
827 		retval = -ENODEV;
828 		goto out_unlock;
829 	}
830 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
831 			q->properties.type)];
832 
833 	/* Save previous activity state for counters */
834 	prev_active = q->properties.is_active;
835 
836 	/* Make sure the queue is unmapped before updating the MQD */
837 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
838 		if (!dqm->dev->shared_resources.enable_mes)
839 			retval = unmap_queues_cpsch(dqm,
840 						    KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
841 		else if (prev_active)
842 			retval = remove_queue_mes(dqm, q, &pdd->qpd);
843 
844 		if (retval) {
845 			pr_err("unmap queue failed\n");
846 			goto out_unlock;
847 		}
848 	} else if (prev_active &&
849 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
850 		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
851 		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
852 
853 		if (!dqm->sched_running) {
854 			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
855 			goto out_unlock;
856 		}
857 
858 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
859 				(dqm->dev->cwsr_enabled ?
860 				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
861 				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
862 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
863 		if (retval) {
864 			pr_err("destroy mqd failed\n");
865 			goto out_unlock;
866 		}
867 	}
868 
869 	mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
870 
871 	/*
872 	 * check active state vs. the previous state and modify
873 	 * counter accordingly. map_queues_cpsch uses the
874 	 * dqm->active_queue_count to determine whether a new runlist must be
875 	 * uploaded.
876 	 */
877 	if (q->properties.is_active && !prev_active) {
878 		increment_queue_count(dqm, &pdd->qpd, q);
879 	} else if (!q->properties.is_active && prev_active) {
880 		decrement_queue_count(dqm, &pdd->qpd, q);
881 	} else if (q->gws && !q->properties.is_gws) {
882 		if (q->properties.is_active) {
883 			dqm->gws_queue_count++;
884 			pdd->qpd.mapped_gws_queue = true;
885 		}
886 		q->properties.is_gws = true;
887 	} else if (!q->gws && q->properties.is_gws) {
888 		if (q->properties.is_active) {
889 			dqm->gws_queue_count--;
890 			pdd->qpd.mapped_gws_queue = false;
891 		}
892 		q->properties.is_gws = false;
893 	}
894 
895 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
896 		if (!dqm->dev->shared_resources.enable_mes)
897 			retval = map_queues_cpsch(dqm);
898 		else if (q->properties.is_active)
899 			retval = add_queue_mes(dqm, q, &pdd->qpd);
900 	} else if (q->properties.is_active &&
901 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
902 		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
903 		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
904 		if (WARN(q->process->mm != current->mm,
905 			 "should only run in user thread"))
906 			retval = -EFAULT;
907 		else
908 			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
909 						   q->pipe, q->queue,
910 						   &q->properties, current->mm);
911 	}
912 
913 out_unlock:
914 	dqm_unlock(dqm);
915 	return retval;
916 }
917 
918 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
919 					struct qcm_process_device *qpd)
920 {
921 	struct queue *q;
922 	struct mqd_manager *mqd_mgr;
923 	struct kfd_process_device *pdd;
924 	int retval, ret = 0;
925 
926 	dqm_lock(dqm);
927 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
928 		goto out;
929 
930 	pdd = qpd_to_pdd(qpd);
931 	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
932 			    pdd->process->pasid);
933 
934 	pdd->last_evict_timestamp = get_jiffies_64();
935 	/* Mark all queues as evicted. Deactivate all active queues on
936 	 * the qpd.
937 	 */
938 	list_for_each_entry(q, &qpd->queues_list, list) {
939 		q->properties.is_evicted = true;
940 		if (!q->properties.is_active)
941 			continue;
942 
943 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
944 				q->properties.type)];
945 		q->properties.is_active = false;
946 		decrement_queue_count(dqm, qpd, q);
947 
948 		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
949 			continue;
950 
951 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
952 				(dqm->dev->cwsr_enabled ?
953 				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
954 				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
955 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
956 		if (retval && !ret)
957 			/* Return the first error, but keep going to
958 			 * maintain a consistent eviction state
959 			 */
960 			ret = retval;
961 	}
962 
963 out:
964 	dqm_unlock(dqm);
965 	return ret;
966 }
967 
968 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
969 				      struct qcm_process_device *qpd)
970 {
971 	struct queue *q;
972 	struct kfd_process_device *pdd;
973 	int retval = 0;
974 
975 	dqm_lock(dqm);
976 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
977 		goto out;
978 
979 	pdd = qpd_to_pdd(qpd);
980 	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
981 			    pdd->process->pasid);
982 
983 	/* Mark all queues as evicted. Deactivate all active queues on
984 	 * the qpd.
985 	 */
986 	list_for_each_entry(q, &qpd->queues_list, list) {
987 		q->properties.is_evicted = true;
988 		if (!q->properties.is_active)
989 			continue;
990 
991 		q->properties.is_active = false;
992 		decrement_queue_count(dqm, qpd, q);
993 
994 		if (dqm->dev->shared_resources.enable_mes) {
995 			retval = remove_queue_mes(dqm, q, qpd);
996 			if (retval) {
997 				pr_err("Failed to evict queue %d\n",
998 					q->properties.queue_id);
999 				goto out;
1000 			}
1001 		}
1002 	}
1003 	pdd->last_evict_timestamp = get_jiffies_64();
1004 	if (!dqm->dev->shared_resources.enable_mes)
1005 		retval = execute_queues_cpsch(dqm,
1006 					      qpd->is_debug ?
1007 					      KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
1008 					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1009 
1010 out:
1011 	dqm_unlock(dqm);
1012 	return retval;
1013 }
1014 
1015 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
1016 					  struct qcm_process_device *qpd)
1017 {
1018 	struct mm_struct *mm = NULL;
1019 	struct queue *q;
1020 	struct mqd_manager *mqd_mgr;
1021 	struct kfd_process_device *pdd;
1022 	uint64_t pd_base;
1023 	uint64_t eviction_duration;
1024 	int retval, ret = 0;
1025 
1026 	pdd = qpd_to_pdd(qpd);
1027 	/* Retrieve PD base */
1028 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1029 
1030 	dqm_lock(dqm);
1031 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1032 		goto out;
1033 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1034 		qpd->evicted--;
1035 		goto out;
1036 	}
1037 
1038 	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1039 			    pdd->process->pasid);
1040 
1041 	/* Update PD Base in QPD */
1042 	qpd->page_table_base = pd_base;
1043 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1044 
1045 	if (!list_empty(&qpd->queues_list)) {
1046 		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1047 				dqm->dev->adev,
1048 				qpd->vmid,
1049 				qpd->page_table_base);
1050 		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1051 	}
1052 
1053 	/* Take a safe reference to the mm_struct, which may otherwise
1054 	 * disappear even while the kfd_process is still referenced.
1055 	 */
1056 	mm = get_task_mm(pdd->process->lead_thread);
1057 	if (!mm) {
1058 		ret = -EFAULT;
1059 		goto out;
1060 	}
1061 
1062 	/* Remove the eviction flags. Activate queues that are not
1063 	 * inactive for other reasons.
1064 	 */
1065 	list_for_each_entry(q, &qpd->queues_list, list) {
1066 		q->properties.is_evicted = false;
1067 		if (!QUEUE_IS_ACTIVE(q->properties))
1068 			continue;
1069 
1070 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1071 				q->properties.type)];
1072 		q->properties.is_active = true;
1073 		increment_queue_count(dqm, qpd, q);
1074 
1075 		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
1076 			continue;
1077 
1078 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1079 				       q->queue, &q->properties, mm);
1080 		if (retval && !ret)
1081 			/* Return the first error, but keep going to
1082 			 * maintain a consistent eviction state
1083 			 */
1084 			ret = retval;
1085 	}
1086 	qpd->evicted = 0;
1087 	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1088 	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1089 out:
1090 	if (mm)
1091 		mmput(mm);
1092 	dqm_unlock(dqm);
1093 	return ret;
1094 }
1095 
1096 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
1097 					struct qcm_process_device *qpd)
1098 {
1099 	struct queue *q;
1100 	struct kfd_process_device *pdd;
1101 	uint64_t pd_base;
1102 	uint64_t eviction_duration;
1103 	int retval = 0;
1104 
1105 	pdd = qpd_to_pdd(qpd);
1106 	/* Retrieve PD base */
1107 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1108 
1109 	dqm_lock(dqm);
1110 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1111 		goto out;
1112 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1113 		qpd->evicted--;
1114 		goto out;
1115 	}
1116 
1117 	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1118 			    pdd->process->pasid);
1119 
1120 	/* Update PD Base in QPD */
1121 	qpd->page_table_base = pd_base;
1122 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1123 
1124 	/* activate all active queues on the qpd */
1125 	list_for_each_entry(q, &qpd->queues_list, list) {
1126 		q->properties.is_evicted = false;
1127 		if (!QUEUE_IS_ACTIVE(q->properties))
1128 			continue;
1129 
1130 		q->properties.is_active = true;
1131 		increment_queue_count(dqm, &pdd->qpd, q);
1132 
1133 		if (dqm->dev->shared_resources.enable_mes) {
1134 			retval = add_queue_mes(dqm, q, qpd);
1135 			if (retval) {
1136 				pr_err("Failed to restore queue %d\n",
1137 					q->properties.queue_id);
1138 				goto out;
1139 			}
1140 		}
1141 	}
1142 	if (!dqm->dev->shared_resources.enable_mes)
1143 		retval = execute_queues_cpsch(dqm,
1144 					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1145 	qpd->evicted = 0;
1146 	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1147 	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1148 out:
1149 	dqm_unlock(dqm);
1150 	return retval;
1151 }
1152 
1153 static int register_process(struct device_queue_manager *dqm,
1154 					struct qcm_process_device *qpd)
1155 {
1156 	struct device_process_node *n;
1157 	struct kfd_process_device *pdd;
1158 	uint64_t pd_base;
1159 	int retval;
1160 
1161 	n = kzalloc(sizeof(*n), GFP_KERNEL);
1162 	if (!n)
1163 		return -ENOMEM;
1164 
1165 	n->qpd = qpd;
1166 
1167 	pdd = qpd_to_pdd(qpd);
1168 	/* Retrieve PD base */
1169 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1170 
1171 	dqm_lock(dqm);
1172 	list_add(&n->list, &dqm->queues);
1173 
1174 	/* Update PD Base in QPD */
1175 	qpd->page_table_base = pd_base;
1176 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1177 
1178 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
1179 
1180 	dqm->processes_count++;
1181 
1182 	dqm_unlock(dqm);
1183 
1184 	/* Outside the DQM lock because under the DQM lock we can't do
1185 	 * reclaim or take other locks that others hold while reclaiming.
1186 	 */
1187 	kfd_inc_compute_active(dqm->dev);
1188 
1189 	return retval;
1190 }
1191 
1192 static int unregister_process(struct device_queue_manager *dqm,
1193 					struct qcm_process_device *qpd)
1194 {
1195 	int retval;
1196 	struct device_process_node *cur, *next;
1197 
1198 	pr_debug("qpd->queues_list is %s\n",
1199 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
1200 
1201 	retval = 0;
1202 	dqm_lock(dqm);
1203 
1204 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
1205 		if (qpd == cur->qpd) {
1206 			list_del(&cur->list);
1207 			kfree(cur);
1208 			dqm->processes_count--;
1209 			goto out;
1210 		}
1211 	}
1212 	/* qpd not found in dqm list */
1213 	retval = 1;
1214 out:
1215 	dqm_unlock(dqm);
1216 
1217 	/* Outside the DQM lock because under the DQM lock we can't do
1218 	 * reclaim or take other locks that others hold while reclaiming.
1219 	 */
1220 	if (!retval)
1221 		kfd_dec_compute_active(dqm->dev);
1222 
1223 	return retval;
1224 }
1225 
1226 static int
1227 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1228 			unsigned int vmid)
1229 {
1230 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1231 						dqm->dev->adev, pasid, vmid);
1232 }
1233 
1234 static void init_interrupts(struct device_queue_manager *dqm)
1235 {
1236 	unsigned int i;
1237 
1238 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
1239 		if (is_pipe_enabled(dqm, 0, i))
1240 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
1241 }
1242 
1243 static int initialize_nocpsch(struct device_queue_manager *dqm)
1244 {
1245 	int pipe, queue;
1246 
1247 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1248 
1249 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
1250 					sizeof(unsigned int), GFP_KERNEL);
1251 	if (!dqm->allocated_queues)
1252 		return -ENOMEM;
1253 
1254 	mutex_init(&dqm->lock_hidden);
1255 	INIT_LIST_HEAD(&dqm->queues);
1256 	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1257 	dqm->active_cp_queue_count = 0;
1258 	dqm->gws_queue_count = 0;
1259 
1260 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1261 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
1262 
1263 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
1264 			if (test_bit(pipe_offset + queue,
1265 				     dqm->dev->shared_resources.cp_queue_bitmap))
1266 				dqm->allocated_queues[pipe] |= 1 << queue;
1267 	}
1268 
1269 	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
1270 
1271 	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1272 	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
1273 	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
1274 
1275 	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1276 
1277 	return 0;
1278 }
1279 
1280 static void uninitialize(struct device_queue_manager *dqm)
1281 {
1282 	int i;
1283 
1284 	WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1285 
1286 	kfree(dqm->allocated_queues);
1287 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1288 		kfree(dqm->mqd_mgrs[i]);
1289 	mutex_destroy(&dqm->lock_hidden);
1290 }
1291 
1292 static int start_nocpsch(struct device_queue_manager *dqm)
1293 {
1294 	int r = 0;
1295 
1296 	pr_info("SW scheduler is used");
1297 	init_interrupts(dqm);
1298 
1299 	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1300 		r = pm_init(&dqm->packet_mgr, dqm);
1301 	if (!r)
1302 		dqm->sched_running = true;
1303 
1304 	return r;
1305 }
1306 
1307 static int stop_nocpsch(struct device_queue_manager *dqm)
1308 {
1309 	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1310 		pm_uninit(&dqm->packet_mgr, false);
1311 	dqm->sched_running = false;
1312 
1313 	return 0;
1314 }
1315 
1316 static void pre_reset(struct device_queue_manager *dqm)
1317 {
1318 	dqm_lock(dqm);
1319 	dqm->is_resetting = true;
1320 	dqm_unlock(dqm);
1321 }
1322 
1323 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1324 				struct queue *q, const uint32_t *restore_sdma_id)
1325 {
1326 	int bit;
1327 
1328 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1329 		if (dqm->sdma_bitmap == 0) {
1330 			pr_err("No more SDMA queue to allocate\n");
1331 			return -ENOMEM;
1332 		}
1333 
1334 		if (restore_sdma_id) {
1335 			/* Re-use existing sdma_id */
1336 			if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
1337 				pr_err("SDMA queue already in use\n");
1338 				return -EBUSY;
1339 			}
1340 			dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
1341 			q->sdma_id = *restore_sdma_id;
1342 		} else {
1343 			/* Find first available sdma_id */
1344 			bit = __ffs64(dqm->sdma_bitmap);
1345 			dqm->sdma_bitmap &= ~(1ULL << bit);
1346 			q->sdma_id = bit;
1347 		}
1348 
1349 		q->properties.sdma_engine_id = q->sdma_id %
1350 				kfd_get_num_sdma_engines(dqm->dev);
1351 		q->properties.sdma_queue_id = q->sdma_id /
1352 				kfd_get_num_sdma_engines(dqm->dev);
1353 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1354 		if (dqm->xgmi_sdma_bitmap == 0) {
1355 			pr_err("No more XGMI SDMA queue to allocate\n");
1356 			return -ENOMEM;
1357 		}
1358 		if (restore_sdma_id) {
1359 			/* Re-use existing sdma_id */
1360 			if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
1361 				pr_err("SDMA queue already in use\n");
1362 				return -EBUSY;
1363 			}
1364 			dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
1365 			q->sdma_id = *restore_sdma_id;
1366 		} else {
1367 			bit = __ffs64(dqm->xgmi_sdma_bitmap);
1368 			dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1369 			q->sdma_id = bit;
1370 		}
1371 		/* sdma_engine_id is sdma id including
1372 		 * both PCIe-optimized SDMAs and XGMI-
1373 		 * optimized SDMAs. The calculation below
1374 		 * assumes the first N engines are always
1375 		 * PCIe-optimized ones
1376 		 */
1377 		q->properties.sdma_engine_id =
1378 			kfd_get_num_sdma_engines(dqm->dev) +
1379 			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1380 		q->properties.sdma_queue_id = q->sdma_id /
1381 			kfd_get_num_xgmi_sdma_engines(dqm->dev);
1382 	}
1383 
1384 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1385 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1386 
1387 	return 0;
1388 }
1389 
1390 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1391 				struct queue *q)
1392 {
1393 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1394 		if (q->sdma_id >= get_num_sdma_queues(dqm))
1395 			return;
1396 		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1397 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1398 		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1399 			return;
1400 		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1401 	}
1402 }
1403 
1404 /*
1405  * Device Queue Manager implementation for cp scheduler
1406  */
1407 
1408 static int set_sched_resources(struct device_queue_manager *dqm)
1409 {
1410 	int i, mec;
1411 	struct scheduling_resources res;
1412 
1413 	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1414 
1415 	res.queue_mask = 0;
1416 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1417 		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1418 			/ dqm->dev->shared_resources.num_pipe_per_mec;
1419 
1420 		if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1421 			continue;
1422 
1423 		/* only acquire queues from the first MEC */
1424 		if (mec > 0)
1425 			continue;
1426 
1427 		/* This situation may be hit in the future if a new HW
1428 		 * generation exposes more than 64 queues. If so, the
1429 		 * definition of res.queue_mask needs updating
1430 		 */
1431 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1432 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1433 			break;
1434 		}
1435 
1436 		res.queue_mask |= 1ull
1437 			<< amdgpu_queue_mask_bit_to_set_resource_bit(
1438 				dqm->dev->adev, i);
1439 	}
1440 	res.gws_mask = ~0ull;
1441 	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1442 
1443 	pr_debug("Scheduling resources:\n"
1444 			"vmid mask: 0x%8X\n"
1445 			"queue mask: 0x%8llX\n",
1446 			res.vmid_mask, res.queue_mask);
1447 
1448 	return pm_send_set_resources(&dqm->packet_mgr, &res);
1449 }
1450 
1451 static int initialize_cpsch(struct device_queue_manager *dqm)
1452 {
1453 	uint64_t num_sdma_queues;
1454 	uint64_t num_xgmi_sdma_queues;
1455 
1456 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1457 
1458 	mutex_init(&dqm->lock_hidden);
1459 	INIT_LIST_HEAD(&dqm->queues);
1460 	dqm->active_queue_count = dqm->processes_count = 0;
1461 	dqm->active_cp_queue_count = 0;
1462 	dqm->gws_queue_count = 0;
1463 	dqm->active_runlist = false;
1464 
1465 	num_sdma_queues = get_num_sdma_queues(dqm);
1466 	if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1467 		dqm->sdma_bitmap = ULLONG_MAX;
1468 	else
1469 		dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1470 
1471 	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
1472 	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
1473 
1474 	num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1475 	if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1476 		dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1477 	else
1478 		dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
1479 
1480 	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1481 
1482 	return 0;
1483 }
1484 
1485 static int start_cpsch(struct device_queue_manager *dqm)
1486 {
1487 	int retval;
1488 
1489 	retval = 0;
1490 
1491 	dqm_lock(dqm);
1492 
1493 	if (!dqm->dev->shared_resources.enable_mes) {
1494 		retval = pm_init(&dqm->packet_mgr, dqm);
1495 		if (retval)
1496 			goto fail_packet_manager_init;
1497 
1498 		retval = set_sched_resources(dqm);
1499 		if (retval)
1500 			goto fail_set_sched_resources;
1501 	}
1502 	pr_debug("Allocating fence memory\n");
1503 
1504 	/* allocate fence memory on the gart */
1505 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1506 					&dqm->fence_mem);
1507 
1508 	if (retval)
1509 		goto fail_allocate_vidmem;
1510 
1511 	dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1512 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1513 
1514 	init_interrupts(dqm);
1515 
1516 	/* clear hang status when driver try to start the hw scheduler */
1517 	dqm->is_hws_hang = false;
1518 	dqm->is_resetting = false;
1519 	dqm->sched_running = true;
1520 	if (!dqm->dev->shared_resources.enable_mes)
1521 		execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1522 	dqm_unlock(dqm);
1523 
1524 	return 0;
1525 fail_allocate_vidmem:
1526 fail_set_sched_resources:
1527 	if (!dqm->dev->shared_resources.enable_mes)
1528 		pm_uninit(&dqm->packet_mgr, false);
1529 fail_packet_manager_init:
1530 	dqm_unlock(dqm);
1531 	return retval;
1532 }
1533 
1534 static int stop_cpsch(struct device_queue_manager *dqm)
1535 {
1536 	bool hanging;
1537 
1538 	dqm_lock(dqm);
1539 	if (!dqm->sched_running) {
1540 		dqm_unlock(dqm);
1541 		return 0;
1542 	}
1543 
1544 	if (!dqm->is_hws_hang) {
1545 		if (!dqm->dev->shared_resources.enable_mes)
1546 			unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
1547 		else
1548 			remove_all_queues_mes(dqm);
1549 	}
1550 
1551 	hanging = dqm->is_hws_hang || dqm->is_resetting;
1552 	dqm->sched_running = false;
1553 
1554 	if (!dqm->dev->shared_resources.enable_mes)
1555 		pm_release_ib(&dqm->packet_mgr);
1556 
1557 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1558 	if (!dqm->dev->shared_resources.enable_mes)
1559 		pm_uninit(&dqm->packet_mgr, hanging);
1560 	dqm_unlock(dqm);
1561 
1562 	return 0;
1563 }
1564 
1565 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1566 					struct kernel_queue *kq,
1567 					struct qcm_process_device *qpd)
1568 {
1569 	dqm_lock(dqm);
1570 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1571 		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1572 				dqm->total_queue_count);
1573 		dqm_unlock(dqm);
1574 		return -EPERM;
1575 	}
1576 
1577 	/*
1578 	 * Unconditionally increment this counter, regardless of the queue's
1579 	 * type or whether the queue is active.
1580 	 */
1581 	dqm->total_queue_count++;
1582 	pr_debug("Total of %d queues are accountable so far\n",
1583 			dqm->total_queue_count);
1584 
1585 	list_add(&kq->list, &qpd->priv_queue_list);
1586 	increment_queue_count(dqm, qpd, kq->queue);
1587 	qpd->is_debug = true;
1588 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1589 	dqm_unlock(dqm);
1590 
1591 	return 0;
1592 }
1593 
1594 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1595 					struct kernel_queue *kq,
1596 					struct qcm_process_device *qpd)
1597 {
1598 	dqm_lock(dqm);
1599 	list_del(&kq->list);
1600 	decrement_queue_count(dqm, qpd, kq->queue);
1601 	qpd->is_debug = false;
1602 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1603 	/*
1604 	 * Unconditionally decrement this counter, regardless of the queue's
1605 	 * type.
1606 	 */
1607 	dqm->total_queue_count--;
1608 	pr_debug("Total of %d queues are accountable so far\n",
1609 			dqm->total_queue_count);
1610 	dqm_unlock(dqm);
1611 }
1612 
1613 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1614 			struct qcm_process_device *qpd,
1615 			const struct kfd_criu_queue_priv_data *qd,
1616 			const void *restore_mqd, const void *restore_ctl_stack)
1617 {
1618 	int retval;
1619 	struct mqd_manager *mqd_mgr;
1620 
1621 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1622 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1623 				dqm->total_queue_count);
1624 		retval = -EPERM;
1625 		goto out;
1626 	}
1627 
1628 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1629 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1630 		dqm_lock(dqm);
1631 		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1632 		dqm_unlock(dqm);
1633 		if (retval)
1634 			goto out;
1635 	}
1636 
1637 	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
1638 	if (retval)
1639 		goto out_deallocate_sdma_queue;
1640 
1641 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1642 			q->properties.type)];
1643 
1644 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1645 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1646 		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1647 	q->properties.tba_addr = qpd->tba_addr;
1648 	q->properties.tma_addr = qpd->tma_addr;
1649 	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1650 	if (!q->mqd_mem_obj) {
1651 		retval = -ENOMEM;
1652 		goto out_deallocate_doorbell;
1653 	}
1654 
1655 	dqm_lock(dqm);
1656 	/*
1657 	 * Eviction state logic: mark all queues as evicted, even ones
1658 	 * not currently active. Restoring inactive queues later only
1659 	 * updates the is_evicted flag but is a no-op otherwise.
1660 	 */
1661 	q->properties.is_evicted = !!qpd->evicted;
1662 
1663 	if (qd)
1664 		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
1665 				     &q->properties, restore_mqd, restore_ctl_stack,
1666 				     qd->ctl_stack_size);
1667 	else
1668 		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1669 					&q->gart_mqd_addr, &q->properties);
1670 
1671 	list_add(&q->list, &qpd->queues_list);
1672 	qpd->queue_count++;
1673 
1674 	if (q->properties.is_active) {
1675 		increment_queue_count(dqm, qpd, q);
1676 
1677 		if (!dqm->dev->shared_resources.enable_mes)
1678 			retval = execute_queues_cpsch(dqm,
1679 					KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1680 		else
1681 			retval = add_queue_mes(dqm, q, qpd);
1682 		if (retval)
1683 			goto cleanup_queue;
1684 	}
1685 
1686 	/*
1687 	 * Unconditionally increment this counter, regardless of the queue's
1688 	 * type or whether the queue is active.
1689 	 */
1690 	dqm->total_queue_count++;
1691 
1692 	pr_debug("Total of %d queues are accountable so far\n",
1693 			dqm->total_queue_count);
1694 
1695 	dqm_unlock(dqm);
1696 	return retval;
1697 
1698 cleanup_queue:
1699 	qpd->queue_count--;
1700 	list_del(&q->list);
1701 	if (q->properties.is_active)
1702 		decrement_queue_count(dqm, qpd, q);
1703 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1704 	dqm_unlock(dqm);
1705 out_deallocate_doorbell:
1706 	deallocate_doorbell(qpd, q);
1707 out_deallocate_sdma_queue:
1708 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1709 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1710 		dqm_lock(dqm);
1711 		deallocate_sdma_queue(dqm, q);
1712 		dqm_unlock(dqm);
1713 	}
1714 out:
1715 	return retval;
1716 }
1717 
1718 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1719 				uint64_t fence_value,
1720 				unsigned int timeout_ms)
1721 {
1722 	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1723 
1724 	while (*fence_addr != fence_value) {
1725 		if (time_after(jiffies, end_jiffies)) {
1726 			pr_err("qcm fence wait loop timeout expired\n");
1727 			/* In HWS case, this is used to halt the driver thread
1728 			 * in order not to mess up CP states before doing
1729 			 * scandumps for FW debugging.
1730 			 */
1731 			while (halt_if_hws_hang)
1732 				schedule();
1733 
1734 			return -ETIME;
1735 		}
1736 		schedule();
1737 	}
1738 
1739 	return 0;
1740 }
1741 
1742 /* dqm->lock mutex has to be locked before calling this function */
1743 static int map_queues_cpsch(struct device_queue_manager *dqm)
1744 {
1745 	int retval;
1746 
1747 	if (!dqm->sched_running)
1748 		return 0;
1749 	if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1750 		return 0;
1751 	if (dqm->active_runlist)
1752 		return 0;
1753 
1754 	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1755 	pr_debug("%s sent runlist\n", __func__);
1756 	if (retval) {
1757 		pr_err("failed to execute runlist\n");
1758 		return retval;
1759 	}
1760 	dqm->active_runlist = true;
1761 
1762 	return retval;
1763 }
1764 
1765 /* dqm->lock mutex has to be locked before calling this function */
1766 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1767 				enum kfd_unmap_queues_filter filter,
1768 				uint32_t filter_param, bool reset)
1769 {
1770 	int retval = 0;
1771 	struct mqd_manager *mqd_mgr;
1772 
1773 	if (!dqm->sched_running)
1774 		return 0;
1775 	if (dqm->is_hws_hang || dqm->is_resetting)
1776 		return -EIO;
1777 	if (!dqm->active_runlist)
1778 		return retval;
1779 
1780 	retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1781 	if (retval)
1782 		return retval;
1783 
1784 	*dqm->fence_addr = KFD_FENCE_INIT;
1785 	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1786 				KFD_FENCE_COMPLETED);
1787 	/* should be timed out */
1788 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1789 				queue_preemption_timeout_ms);
1790 	if (retval) {
1791 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1792 		kfd_hws_hang(dqm);
1793 		return retval;
1794 	}
1795 
1796 	/* In the current MEC firmware implementation, if compute queue
1797 	 * doesn't response to the preemption request in time, HIQ will
1798 	 * abandon the unmap request without returning any timeout error
1799 	 * to driver. Instead, MEC firmware will log the doorbell of the
1800 	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1801 	 * To make sure the queue unmap was successful, driver need to
1802 	 * check those fields
1803 	 */
1804 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1805 	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1806 		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1807 		while (halt_if_hws_hang)
1808 			schedule();
1809 		return -ETIME;
1810 	}
1811 
1812 	pm_release_ib(&dqm->packet_mgr);
1813 	dqm->active_runlist = false;
1814 
1815 	return retval;
1816 }
1817 
1818 /* only for compute queue */
1819 static int reset_queues_cpsch(struct device_queue_manager *dqm,
1820 			uint16_t pasid)
1821 {
1822 	int retval;
1823 
1824 	dqm_lock(dqm);
1825 
1826 	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
1827 			pasid, true);
1828 
1829 	dqm_unlock(dqm);
1830 	return retval;
1831 }
1832 
1833 /* dqm->lock mutex has to be locked before calling this function */
1834 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1835 				enum kfd_unmap_queues_filter filter,
1836 				uint32_t filter_param)
1837 {
1838 	int retval;
1839 
1840 	if (dqm->is_hws_hang)
1841 		return -EIO;
1842 	retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
1843 	if (retval)
1844 		return retval;
1845 
1846 	return map_queues_cpsch(dqm);
1847 }
1848 
1849 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1850 				struct qcm_process_device *qpd,
1851 				struct queue *q)
1852 {
1853 	int retval;
1854 	struct mqd_manager *mqd_mgr;
1855 	uint64_t sdma_val = 0;
1856 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1857 
1858 	/* Get the SDMA queue stats */
1859 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1860 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1861 		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1862 							&sdma_val);
1863 		if (retval)
1864 			pr_err("Failed to read SDMA queue counter for queue: %d\n",
1865 				q->properties.queue_id);
1866 	}
1867 
1868 	retval = 0;
1869 
1870 	/* remove queue from list to prevent rescheduling after preemption */
1871 	dqm_lock(dqm);
1872 
1873 	if (qpd->is_debug) {
1874 		/*
1875 		 * error, currently we do not allow to destroy a queue
1876 		 * of a currently debugged process
1877 		 */
1878 		retval = -EBUSY;
1879 		goto failed_try_destroy_debugged_queue;
1880 
1881 	}
1882 
1883 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1884 			q->properties.type)];
1885 
1886 	deallocate_doorbell(qpd, q);
1887 
1888 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1889 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1890 		deallocate_sdma_queue(dqm, q);
1891 		pdd->sdma_past_activity_counter += sdma_val;
1892 	}
1893 
1894 	list_del(&q->list);
1895 	qpd->queue_count--;
1896 	if (q->properties.is_active) {
1897 		if (!dqm->dev->shared_resources.enable_mes) {
1898 			decrement_queue_count(dqm, qpd, q);
1899 			retval = execute_queues_cpsch(dqm,
1900 						      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1901 			if (retval == -ETIME)
1902 				qpd->reset_wavefronts = true;
1903 		} else {
1904 			retval = remove_queue_mes(dqm, q, qpd);
1905 		}
1906 	}
1907 
1908 	/*
1909 	 * Unconditionally decrement this counter, regardless of the queue's
1910 	 * type
1911 	 */
1912 	dqm->total_queue_count--;
1913 	pr_debug("Total of %d queues are accountable so far\n",
1914 			dqm->total_queue_count);
1915 
1916 	dqm_unlock(dqm);
1917 
1918 	/* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1919 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1920 
1921 	return retval;
1922 
1923 failed_try_destroy_debugged_queue:
1924 
1925 	dqm_unlock(dqm);
1926 	return retval;
1927 }
1928 
1929 /*
1930  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1931  * stay in user mode.
1932  */
1933 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1934 /* APE1 limit is inclusive and 64K aligned. */
1935 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1936 
1937 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1938 				   struct qcm_process_device *qpd,
1939 				   enum cache_policy default_policy,
1940 				   enum cache_policy alternate_policy,
1941 				   void __user *alternate_aperture_base,
1942 				   uint64_t alternate_aperture_size)
1943 {
1944 	bool retval = true;
1945 
1946 	if (!dqm->asic_ops.set_cache_memory_policy)
1947 		return retval;
1948 
1949 	dqm_lock(dqm);
1950 
1951 	if (alternate_aperture_size == 0) {
1952 		/* base > limit disables APE1 */
1953 		qpd->sh_mem_ape1_base = 1;
1954 		qpd->sh_mem_ape1_limit = 0;
1955 	} else {
1956 		/*
1957 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1958 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1959 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1960 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1961 		 * Verify that the base and size parameters can be
1962 		 * represented in this format and convert them.
1963 		 * Additionally restrict APE1 to user-mode addresses.
1964 		 */
1965 
1966 		uint64_t base = (uintptr_t)alternate_aperture_base;
1967 		uint64_t limit = base + alternate_aperture_size - 1;
1968 
1969 		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1970 		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1971 			retval = false;
1972 			goto out;
1973 		}
1974 
1975 		qpd->sh_mem_ape1_base = base >> 16;
1976 		qpd->sh_mem_ape1_limit = limit >> 16;
1977 	}
1978 
1979 	retval = dqm->asic_ops.set_cache_memory_policy(
1980 			dqm,
1981 			qpd,
1982 			default_policy,
1983 			alternate_policy,
1984 			alternate_aperture_base,
1985 			alternate_aperture_size);
1986 
1987 	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1988 		program_sh_mem_settings(dqm, qpd);
1989 
1990 	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1991 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1992 		qpd->sh_mem_ape1_limit);
1993 
1994 out:
1995 	dqm_unlock(dqm);
1996 	return retval;
1997 }
1998 
1999 static int process_termination_nocpsch(struct device_queue_manager *dqm,
2000 		struct qcm_process_device *qpd)
2001 {
2002 	struct queue *q;
2003 	struct device_process_node *cur, *next_dpn;
2004 	int retval = 0;
2005 	bool found = false;
2006 
2007 	dqm_lock(dqm);
2008 
2009 	/* Clear all user mode queues */
2010 	while (!list_empty(&qpd->queues_list)) {
2011 		struct mqd_manager *mqd_mgr;
2012 		int ret;
2013 
2014 		q = list_first_entry(&qpd->queues_list, struct queue, list);
2015 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2016 				q->properties.type)];
2017 		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
2018 		if (ret)
2019 			retval = ret;
2020 		dqm_unlock(dqm);
2021 		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2022 		dqm_lock(dqm);
2023 	}
2024 
2025 	/* Unregister process */
2026 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2027 		if (qpd == cur->qpd) {
2028 			list_del(&cur->list);
2029 			kfree(cur);
2030 			dqm->processes_count--;
2031 			found = true;
2032 			break;
2033 		}
2034 	}
2035 
2036 	dqm_unlock(dqm);
2037 
2038 	/* Outside the DQM lock because under the DQM lock we can't do
2039 	 * reclaim or take other locks that others hold while reclaiming.
2040 	 */
2041 	if (found)
2042 		kfd_dec_compute_active(dqm->dev);
2043 
2044 	return retval;
2045 }
2046 
2047 static int get_wave_state(struct device_queue_manager *dqm,
2048 			  struct queue *q,
2049 			  void __user *ctl_stack,
2050 			  u32 *ctl_stack_used_size,
2051 			  u32 *save_area_used_size)
2052 {
2053 	struct mqd_manager *mqd_mgr;
2054 
2055 	dqm_lock(dqm);
2056 
2057 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2058 
2059 	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
2060 	    q->properties.is_active || !q->device->cwsr_enabled ||
2061 	    !mqd_mgr->get_wave_state) {
2062 		dqm_unlock(dqm);
2063 		return -EINVAL;
2064 	}
2065 
2066 	dqm_unlock(dqm);
2067 
2068 	/*
2069 	 * get_wave_state is outside the dqm lock to prevent circular locking
2070 	 * and the queue should be protected against destruction by the process
2071 	 * lock.
2072 	 */
2073 	return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
2074 			ctl_stack_used_size, save_area_used_size);
2075 }
2076 
2077 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
2078 			const struct queue *q,
2079 			u32 *mqd_size,
2080 			u32 *ctl_stack_size)
2081 {
2082 	struct mqd_manager *mqd_mgr;
2083 	enum KFD_MQD_TYPE mqd_type =
2084 			get_mqd_type_from_queue_type(q->properties.type);
2085 
2086 	dqm_lock(dqm);
2087 	mqd_mgr = dqm->mqd_mgrs[mqd_type];
2088 	*mqd_size = mqd_mgr->mqd_size;
2089 	*ctl_stack_size = 0;
2090 
2091 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
2092 		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
2093 
2094 	dqm_unlock(dqm);
2095 }
2096 
2097 static int checkpoint_mqd(struct device_queue_manager *dqm,
2098 			  const struct queue *q,
2099 			  void *mqd,
2100 			  void *ctl_stack)
2101 {
2102 	struct mqd_manager *mqd_mgr;
2103 	int r = 0;
2104 	enum KFD_MQD_TYPE mqd_type =
2105 			get_mqd_type_from_queue_type(q->properties.type);
2106 
2107 	dqm_lock(dqm);
2108 
2109 	if (q->properties.is_active || !q->device->cwsr_enabled) {
2110 		r = -EINVAL;
2111 		goto dqm_unlock;
2112 	}
2113 
2114 	mqd_mgr = dqm->mqd_mgrs[mqd_type];
2115 	if (!mqd_mgr->checkpoint_mqd) {
2116 		r = -EOPNOTSUPP;
2117 		goto dqm_unlock;
2118 	}
2119 
2120 	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
2121 
2122 dqm_unlock:
2123 	dqm_unlock(dqm);
2124 	return r;
2125 }
2126 
2127 static int process_termination_cpsch(struct device_queue_manager *dqm,
2128 		struct qcm_process_device *qpd)
2129 {
2130 	int retval;
2131 	struct queue *q;
2132 	struct kernel_queue *kq, *kq_next;
2133 	struct mqd_manager *mqd_mgr;
2134 	struct device_process_node *cur, *next_dpn;
2135 	enum kfd_unmap_queues_filter filter =
2136 		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
2137 	bool found = false;
2138 
2139 	retval = 0;
2140 
2141 	dqm_lock(dqm);
2142 
2143 	/* Clean all kernel queues */
2144 	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
2145 		list_del(&kq->list);
2146 		decrement_queue_count(dqm, qpd, kq->queue);
2147 		qpd->is_debug = false;
2148 		dqm->total_queue_count--;
2149 		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
2150 	}
2151 
2152 	/* Clear all user mode queues */
2153 	list_for_each_entry(q, &qpd->queues_list, list) {
2154 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
2155 			deallocate_sdma_queue(dqm, q);
2156 		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
2157 			deallocate_sdma_queue(dqm, q);
2158 
2159 		if (q->properties.is_active) {
2160 			decrement_queue_count(dqm, qpd, q);
2161 
2162 			if (dqm->dev->shared_resources.enable_mes) {
2163 				retval = remove_queue_mes(dqm, q, qpd);
2164 				if (retval)
2165 					pr_err("Failed to remove queue %d\n",
2166 						q->properties.queue_id);
2167 			}
2168 		}
2169 
2170 		dqm->total_queue_count--;
2171 	}
2172 
2173 	/* Unregister process */
2174 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2175 		if (qpd == cur->qpd) {
2176 			list_del(&cur->list);
2177 			kfree(cur);
2178 			dqm->processes_count--;
2179 			found = true;
2180 			break;
2181 		}
2182 	}
2183 
2184 	if (!dqm->dev->shared_resources.enable_mes)
2185 		retval = execute_queues_cpsch(dqm, filter, 0);
2186 
2187 	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2188 		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
2189 		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
2190 		qpd->reset_wavefronts = false;
2191 	}
2192 
2193 	/* Lastly, free mqd resources.
2194 	 * Do free_mqd() after dqm_unlock to avoid circular locking.
2195 	 */
2196 	while (!list_empty(&qpd->queues_list)) {
2197 		q = list_first_entry(&qpd->queues_list, struct queue, list);
2198 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2199 				q->properties.type)];
2200 		list_del(&q->list);
2201 		qpd->queue_count--;
2202 		dqm_unlock(dqm);
2203 		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2204 		dqm_lock(dqm);
2205 	}
2206 	dqm_unlock(dqm);
2207 
2208 	/* Outside the DQM lock because under the DQM lock we can't do
2209 	 * reclaim or take other locks that others hold while reclaiming.
2210 	 */
2211 	if (found)
2212 		kfd_dec_compute_active(dqm->dev);
2213 
2214 	return retval;
2215 }
2216 
2217 static int init_mqd_managers(struct device_queue_manager *dqm)
2218 {
2219 	int i, j;
2220 	struct mqd_manager *mqd_mgr;
2221 
2222 	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
2223 		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
2224 		if (!mqd_mgr) {
2225 			pr_err("mqd manager [%d] initialization failed\n", i);
2226 			goto out_free;
2227 		}
2228 		dqm->mqd_mgrs[i] = mqd_mgr;
2229 	}
2230 
2231 	return 0;
2232 
2233 out_free:
2234 	for (j = 0; j < i; j++) {
2235 		kfree(dqm->mqd_mgrs[j]);
2236 		dqm->mqd_mgrs[j] = NULL;
2237 	}
2238 
2239 	return -ENOMEM;
2240 }
2241 
2242 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
2243 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
2244 {
2245 	int retval;
2246 	struct kfd_dev *dev = dqm->dev;
2247 	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
2248 	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2249 		get_num_all_sdma_engines(dqm) *
2250 		dev->device_info.num_sdma_queues_per_engine +
2251 		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
2252 
2253 	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
2254 		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
2255 		(void *)&(mem_obj->cpu_ptr), false);
2256 
2257 	return retval;
2258 }
2259 
2260 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
2261 {
2262 	struct device_queue_manager *dqm;
2263 
2264 	pr_debug("Loading device queue manager\n");
2265 
2266 	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2267 	if (!dqm)
2268 		return NULL;
2269 
2270 	switch (dev->adev->asic_type) {
2271 	/* HWS is not available on Hawaii. */
2272 	case CHIP_HAWAII:
2273 	/* HWS depends on CWSR for timely dequeue. CWSR is not
2274 	 * available on Tonga.
2275 	 *
2276 	 * FIXME: This argument also applies to Kaveri.
2277 	 */
2278 	case CHIP_TONGA:
2279 		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
2280 		break;
2281 	default:
2282 		dqm->sched_policy = sched_policy;
2283 		break;
2284 	}
2285 
2286 	dqm->dev = dev;
2287 	switch (dqm->sched_policy) {
2288 	case KFD_SCHED_POLICY_HWS:
2289 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
2290 		/* initialize dqm for cp scheduling */
2291 		dqm->ops.create_queue = create_queue_cpsch;
2292 		dqm->ops.initialize = initialize_cpsch;
2293 		dqm->ops.start = start_cpsch;
2294 		dqm->ops.stop = stop_cpsch;
2295 		dqm->ops.pre_reset = pre_reset;
2296 		dqm->ops.destroy_queue = destroy_queue_cpsch;
2297 		dqm->ops.update_queue = update_queue;
2298 		dqm->ops.register_process = register_process;
2299 		dqm->ops.unregister_process = unregister_process;
2300 		dqm->ops.uninitialize = uninitialize;
2301 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
2302 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
2303 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2304 		dqm->ops.process_termination = process_termination_cpsch;
2305 		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
2306 		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2307 		dqm->ops.get_wave_state = get_wave_state;
2308 		dqm->ops.reset_queues = reset_queues_cpsch;
2309 		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2310 		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2311 		break;
2312 	case KFD_SCHED_POLICY_NO_HWS:
2313 		/* initialize dqm for no cp scheduling */
2314 		dqm->ops.start = start_nocpsch;
2315 		dqm->ops.stop = stop_nocpsch;
2316 		dqm->ops.pre_reset = pre_reset;
2317 		dqm->ops.create_queue = create_queue_nocpsch;
2318 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
2319 		dqm->ops.update_queue = update_queue;
2320 		dqm->ops.register_process = register_process;
2321 		dqm->ops.unregister_process = unregister_process;
2322 		dqm->ops.initialize = initialize_nocpsch;
2323 		dqm->ops.uninitialize = uninitialize;
2324 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2325 		dqm->ops.process_termination = process_termination_nocpsch;
2326 		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
2327 		dqm->ops.restore_process_queues =
2328 			restore_process_queues_nocpsch;
2329 		dqm->ops.get_wave_state = get_wave_state;
2330 		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2331 		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2332 		break;
2333 	default:
2334 		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
2335 		goto out_free;
2336 	}
2337 
2338 	switch (dev->adev->asic_type) {
2339 	case CHIP_CARRIZO:
2340 		device_queue_manager_init_vi(&dqm->asic_ops);
2341 		break;
2342 
2343 	case CHIP_KAVERI:
2344 		device_queue_manager_init_cik(&dqm->asic_ops);
2345 		break;
2346 
2347 	case CHIP_HAWAII:
2348 		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
2349 		break;
2350 
2351 	case CHIP_TONGA:
2352 	case CHIP_FIJI:
2353 	case CHIP_POLARIS10:
2354 	case CHIP_POLARIS11:
2355 	case CHIP_POLARIS12:
2356 	case CHIP_VEGAM:
2357 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
2358 		break;
2359 
2360 	default:
2361 		if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
2362 			device_queue_manager_init_v11(&dqm->asic_ops);
2363 		else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
2364 			device_queue_manager_init_v10_navi10(&dqm->asic_ops);
2365 		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
2366 			device_queue_manager_init_v9(&dqm->asic_ops);
2367 		else {
2368 			WARN(1, "Unexpected ASIC family %u",
2369 			     dev->adev->asic_type);
2370 			goto out_free;
2371 		}
2372 	}
2373 
2374 	if (init_mqd_managers(dqm))
2375 		goto out_free;
2376 
2377 	if (allocate_hiq_sdma_mqd(dqm)) {
2378 		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
2379 		goto out_free;
2380 	}
2381 
2382 	if (!dqm->ops.initialize(dqm))
2383 		return dqm;
2384 
2385 out_free:
2386 	kfree(dqm);
2387 	return NULL;
2388 }
2389 
2390 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
2391 				    struct kfd_mem_obj *mqd)
2392 {
2393 	WARN(!mqd, "No hiq sdma mqd trunk to free");
2394 
2395 	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
2396 }
2397 
2398 void device_queue_manager_uninit(struct device_queue_manager *dqm)
2399 {
2400 	dqm->ops.uninitialize(dqm);
2401 	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2402 	kfree(dqm);
2403 }
2404 
2405 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
2406 {
2407 	struct kfd_process_device *pdd;
2408 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2409 	int ret = 0;
2410 
2411 	if (!p)
2412 		return -EINVAL;
2413 	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2414 	pdd = kfd_get_process_device_data(dqm->dev, p);
2415 	if (pdd)
2416 		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2417 	kfd_unref_process(p);
2418 
2419 	return ret;
2420 }
2421 
2422 static void kfd_process_hw_exception(struct work_struct *work)
2423 {
2424 	struct device_queue_manager *dqm = container_of(work,
2425 			struct device_queue_manager, hw_exception_work);
2426 	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2427 }
2428 
2429 #if defined(CONFIG_DEBUG_FS)
2430 
2431 static void seq_reg_dump(struct seq_file *m,
2432 			 uint32_t (*dump)[2], uint32_t n_regs)
2433 {
2434 	uint32_t i, count;
2435 
2436 	for (i = 0, count = 0; i < n_regs; i++) {
2437 		if (count == 0 ||
2438 		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2439 			seq_printf(m, "%s    %08x: %08x",
2440 				   i ? "\n" : "",
2441 				   dump[i][0], dump[i][1]);
2442 			count = 7;
2443 		} else {
2444 			seq_printf(m, " %08x", dump[i][1]);
2445 			count--;
2446 		}
2447 	}
2448 
2449 	seq_puts(m, "\n");
2450 }
2451 
2452 int dqm_debugfs_hqds(struct seq_file *m, void *data)
2453 {
2454 	struct device_queue_manager *dqm = data;
2455 	uint32_t (*dump)[2], n_regs;
2456 	int pipe, queue;
2457 	int r = 0;
2458 
2459 	if (!dqm->sched_running) {
2460 		seq_puts(m, " Device is stopped\n");
2461 		return 0;
2462 	}
2463 
2464 	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
2465 					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2466 					&dump, &n_regs);
2467 	if (!r) {
2468 		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
2469 			   KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2470 			   KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2471 			   KFD_CIK_HIQ_QUEUE);
2472 		seq_reg_dump(m, dump, n_regs);
2473 
2474 		kfree(dump);
2475 	}
2476 
2477 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2478 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
2479 
2480 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2481 			if (!test_bit(pipe_offset + queue,
2482 				      dqm->dev->shared_resources.cp_queue_bitmap))
2483 				continue;
2484 
2485 			r = dqm->dev->kfd2kgd->hqd_dump(
2486 				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2487 			if (r)
2488 				break;
2489 
2490 			seq_printf(m, "  CP Pipe %d, Queue %d\n",
2491 				  pipe, queue);
2492 			seq_reg_dump(m, dump, n_regs);
2493 
2494 			kfree(dump);
2495 		}
2496 	}
2497 
2498 	for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2499 		for (queue = 0;
2500 		     queue < dqm->dev->device_info.num_sdma_queues_per_engine;
2501 		     queue++) {
2502 			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2503 				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2504 			if (r)
2505 				break;
2506 
2507 			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
2508 				  pipe, queue);
2509 			seq_reg_dump(m, dump, n_regs);
2510 
2511 			kfree(dump);
2512 		}
2513 	}
2514 
2515 	return r;
2516 }
2517 
2518 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2519 {
2520 	int r = 0;
2521 
2522 	dqm_lock(dqm);
2523 	r = pm_debugfs_hang_hws(&dqm->packet_mgr);
2524 	if (r) {
2525 		dqm_unlock(dqm);
2526 		return r;
2527 	}
2528 	dqm->active_runlist = true;
2529 	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2530 	dqm_unlock(dqm);
2531 
2532 	return r;
2533 }
2534 
2535 #endif
2536