1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
31 #include "kfd_priv.h"
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
34 #include "cik_regs.h"
35 #include "kfd_kernel_queue.h"
36 
37 /* Size of the per-pipe EOP queue */
38 #define CIK_HPD_EOP_BYTES_LOG2 11
39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
40 
41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
42 					unsigned int pasid, unsigned int vmid);
43 
44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
45 					struct queue *q,
46 					struct qcm_process_device *qpd);
47 
48 static int execute_queues_cpsch(struct device_queue_manager *dqm,
49 				enum kfd_unmap_queues_filter filter,
50 				uint32_t filter_param);
51 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
52 				enum kfd_unmap_queues_filter filter,
53 				uint32_t filter_param);
54 
55 static int map_queues_cpsch(struct device_queue_manager *dqm);
56 
57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
58 					struct queue *q,
59 					struct qcm_process_device *qpd);
60 
61 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
62 				unsigned int sdma_queue_id);
63 
64 static inline
65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
66 {
67 	if (type == KFD_QUEUE_TYPE_SDMA)
68 		return KFD_MQD_TYPE_SDMA;
69 	return KFD_MQD_TYPE_CP;
70 }
71 
72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
73 {
74 	int i;
75 	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
76 		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;
77 
78 	/* queue is available for KFD usage if bit is 1 */
79 	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
80 		if (test_bit(pipe_offset + i,
81 			      dqm->dev->shared_resources.queue_bitmap))
82 			return true;
83 	return false;
84 }
85 
86 unsigned int get_queues_num(struct device_queue_manager *dqm)
87 {
88 	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
89 				KGD_MAX_QUEUES);
90 }
91 
92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
93 {
94 	return dqm->dev->shared_resources.num_queue_per_pipe;
95 }
96 
97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
98 {
99 	return dqm->dev->shared_resources.num_pipe_per_mec;
100 }
101 
102 void program_sh_mem_settings(struct device_queue_manager *dqm,
103 					struct qcm_process_device *qpd)
104 {
105 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
106 						dqm->dev->kgd, qpd->vmid,
107 						qpd->sh_mem_config,
108 						qpd->sh_mem_ape1_base,
109 						qpd->sh_mem_ape1_limit,
110 						qpd->sh_mem_bases);
111 }
112 
113 static int allocate_vmid(struct device_queue_manager *dqm,
114 			struct qcm_process_device *qpd,
115 			struct queue *q)
116 {
117 	int bit, allocated_vmid;
118 
119 	if (dqm->vmid_bitmap == 0)
120 		return -ENOMEM;
121 
122 	bit = ffs(dqm->vmid_bitmap) - 1;
123 	dqm->vmid_bitmap &= ~(1 << bit);
124 
125 	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
126 	pr_debug("vmid allocation %d\n", allocated_vmid);
127 	qpd->vmid = allocated_vmid;
128 	q->properties.vmid = allocated_vmid;
129 
130 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
131 	program_sh_mem_settings(dqm, qpd);
132 
133 	/* qpd->page_table_base is set earlier when register_process()
134 	 * is called, i.e. when the first queue is created.
135 	 */
136 	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
137 			qpd->vmid,
138 			qpd->page_table_base);
139 	/* invalidate the VM context after pasid and vmid mapping is set up */
140 	kfd_flush_tlb(qpd_to_pdd(qpd));
141 
142 	return 0;
143 }
144 
145 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
146 				struct qcm_process_device *qpd)
147 {
148 	uint32_t len;
149 
150 	if (!qpd->ib_kaddr)
151 		return -ENOMEM;
152 
153 	len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
154 
155 	return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
156 				qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
157 }
158 
159 static void deallocate_vmid(struct device_queue_manager *dqm,
160 				struct qcm_process_device *qpd,
161 				struct queue *q)
162 {
163 	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
164 
165 	/* On GFX v7, CP doesn't flush TC at dequeue */
166 	if (q->device->device_info->asic_family == CHIP_HAWAII)
167 		if (flush_texture_cache_nocpsch(q->device, qpd))
168 			pr_err("Failed to flush TC\n");
169 
170 	kfd_flush_tlb(qpd_to_pdd(qpd));
171 
172 	/* Release the vmid mapping */
173 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
174 
175 	dqm->vmid_bitmap |= (1 << bit);
176 	qpd->vmid = 0;
177 	q->properties.vmid = 0;
178 }
179 
180 static int create_queue_nocpsch(struct device_queue_manager *dqm,
181 				struct queue *q,
182 				struct qcm_process_device *qpd)
183 {
184 	int retval;
185 
186 	print_queue(q);
187 
188 	mutex_lock(&dqm->lock);
189 
190 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
191 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
192 				dqm->total_queue_count);
193 		retval = -EPERM;
194 		goto out_unlock;
195 	}
196 
197 	if (list_empty(&qpd->queues_list)) {
198 		retval = allocate_vmid(dqm, qpd, q);
199 		if (retval)
200 			goto out_unlock;
201 	}
202 	q->properties.vmid = qpd->vmid;
203 	/*
204 	 * Eviction state logic: we only mark active queues as evicted
205 	 * to avoid the overhead of restoring inactive queues later
206 	 */
207 	if (qpd->evicted)
208 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
209 					    q->properties.queue_percent > 0 &&
210 					    q->properties.queue_address != 0);
211 
212 	q->properties.tba_addr = qpd->tba_addr;
213 	q->properties.tma_addr = qpd->tma_addr;
214 
215 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
216 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
217 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
218 		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
219 	else
220 		retval = -EINVAL;
221 
222 	if (retval) {
223 		if (list_empty(&qpd->queues_list))
224 			deallocate_vmid(dqm, qpd, q);
225 		goto out_unlock;
226 	}
227 
228 	list_add(&q->list, &qpd->queues_list);
229 	qpd->queue_count++;
230 	if (q->properties.is_active)
231 		dqm->queue_count++;
232 
233 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
234 		dqm->sdma_queue_count++;
235 
236 	/*
237 	 * Unconditionally increment this counter, regardless of the queue's
238 	 * type or whether the queue is active.
239 	 */
240 	dqm->total_queue_count++;
241 	pr_debug("Total of %d queues are accountable so far\n",
242 			dqm->total_queue_count);
243 
244 out_unlock:
245 	mutex_unlock(&dqm->lock);
246 	return retval;
247 }
248 
249 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
250 {
251 	bool set;
252 	int pipe, bit, i;
253 
254 	set = false;
255 
256 	for (pipe = dqm->next_pipe_to_allocate, i = 0;
257 			i < get_pipes_per_mec(dqm);
258 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
259 
260 		if (!is_pipe_enabled(dqm, 0, pipe))
261 			continue;
262 
263 		if (dqm->allocated_queues[pipe] != 0) {
264 			bit = ffs(dqm->allocated_queues[pipe]) - 1;
265 			dqm->allocated_queues[pipe] &= ~(1 << bit);
266 			q->pipe = pipe;
267 			q->queue = bit;
268 			set = true;
269 			break;
270 		}
271 	}
272 
273 	if (!set)
274 		return -EBUSY;
275 
276 	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
277 	/* horizontal hqd allocation */
278 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
279 
280 	return 0;
281 }
282 
283 static inline void deallocate_hqd(struct device_queue_manager *dqm,
284 				struct queue *q)
285 {
286 	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
287 }
288 
289 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
290 					struct queue *q,
291 					struct qcm_process_device *qpd)
292 {
293 	int retval;
294 	struct mqd_manager *mqd;
295 
296 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
297 	if (!mqd)
298 		return -ENOMEM;
299 
300 	retval = allocate_hqd(dqm, q);
301 	if (retval)
302 		return retval;
303 
304 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
305 				&q->gart_mqd_addr, &q->properties);
306 	if (retval)
307 		goto out_deallocate_hqd;
308 
309 	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
310 			q->pipe, q->queue);
311 
312 	dqm->dev->kfd2kgd->set_scratch_backing_va(
313 			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
314 
315 	if (!q->properties.is_active)
316 		return 0;
317 
318 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
319 			       q->process->mm);
320 	if (retval)
321 		goto out_uninit_mqd;
322 
323 	return 0;
324 
325 out_uninit_mqd:
326 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
327 out_deallocate_hqd:
328 	deallocate_hqd(dqm, q);
329 
330 	return retval;
331 }
332 
333 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
334  * to avoid asynchronized access
335  */
336 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
337 				struct qcm_process_device *qpd,
338 				struct queue *q)
339 {
340 	int retval;
341 	struct mqd_manager *mqd;
342 
343 	mqd = dqm->ops.get_mqd_manager(dqm,
344 		get_mqd_type_from_queue_type(q->properties.type));
345 	if (!mqd)
346 		return -ENOMEM;
347 
348 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
349 		deallocate_hqd(dqm, q);
350 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
351 		dqm->sdma_queue_count--;
352 		deallocate_sdma_queue(dqm, q->sdma_id);
353 	} else {
354 		pr_debug("q->properties.type %d is invalid\n",
355 				q->properties.type);
356 		return -EINVAL;
357 	}
358 	dqm->total_queue_count--;
359 
360 	retval = mqd->destroy_mqd(mqd, q->mqd,
361 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
362 				KFD_UNMAP_LATENCY_MS,
363 				q->pipe, q->queue);
364 	if (retval == -ETIME)
365 		qpd->reset_wavefronts = true;
366 
367 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
368 
369 	list_del(&q->list);
370 	if (list_empty(&qpd->queues_list)) {
371 		if (qpd->reset_wavefronts) {
372 			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
373 					dqm->dev);
374 			/* dbgdev_wave_reset_wavefronts has to be called before
375 			 * deallocate_vmid(), i.e. when vmid is still in use.
376 			 */
377 			dbgdev_wave_reset_wavefronts(dqm->dev,
378 					qpd->pqm->process);
379 			qpd->reset_wavefronts = false;
380 		}
381 
382 		deallocate_vmid(dqm, qpd, q);
383 	}
384 	qpd->queue_count--;
385 	if (q->properties.is_active)
386 		dqm->queue_count--;
387 
388 	return retval;
389 }
390 
391 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
392 				struct qcm_process_device *qpd,
393 				struct queue *q)
394 {
395 	int retval;
396 
397 	mutex_lock(&dqm->lock);
398 	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
399 	mutex_unlock(&dqm->lock);
400 
401 	return retval;
402 }
403 
404 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
405 {
406 	int retval;
407 	struct mqd_manager *mqd;
408 	struct kfd_process_device *pdd;
409 	bool prev_active = false;
410 
411 	mutex_lock(&dqm->lock);
412 	pdd = kfd_get_process_device_data(q->device, q->process);
413 	if (!pdd) {
414 		retval = -ENODEV;
415 		goto out_unlock;
416 	}
417 	mqd = dqm->ops.get_mqd_manager(dqm,
418 			get_mqd_type_from_queue_type(q->properties.type));
419 	if (!mqd) {
420 		retval = -ENOMEM;
421 		goto out_unlock;
422 	}
423 	/*
424 	 * Eviction state logic: we only mark active queues as evicted
425 	 * to avoid the overhead of restoring inactive queues later
426 	 */
427 	if (pdd->qpd.evicted)
428 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
429 					    q->properties.queue_percent > 0 &&
430 					    q->properties.queue_address != 0);
431 
432 	/* Save previous activity state for counters */
433 	prev_active = q->properties.is_active;
434 
435 	/* Make sure the queue is unmapped before updating the MQD */
436 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
437 		retval = unmap_queues_cpsch(dqm,
438 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
439 		if (retval) {
440 			pr_err("unmap queue failed\n");
441 			goto out_unlock;
442 		}
443 	} else if (prev_active &&
444 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
445 		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
446 		retval = mqd->destroy_mqd(mqd, q->mqd,
447 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
448 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
449 		if (retval) {
450 			pr_err("destroy mqd failed\n");
451 			goto out_unlock;
452 		}
453 	}
454 
455 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
456 
457 	/*
458 	 * check active state vs. the previous state and modify
459 	 * counter accordingly. map_queues_cpsch uses the
460 	 * dqm->queue_count to determine whether a new runlist must be
461 	 * uploaded.
462 	 */
463 	if (q->properties.is_active && !prev_active)
464 		dqm->queue_count++;
465 	else if (!q->properties.is_active && prev_active)
466 		dqm->queue_count--;
467 
468 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
469 		retval = map_queues_cpsch(dqm);
470 	else if (q->properties.is_active &&
471 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
472 		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
473 		retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
474 				       &q->properties, q->process->mm);
475 
476 out_unlock:
477 	mutex_unlock(&dqm->lock);
478 	return retval;
479 }
480 
481 static struct mqd_manager *get_mqd_manager(
482 		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
483 {
484 	struct mqd_manager *mqd;
485 
486 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
487 		return NULL;
488 
489 	pr_debug("mqd type %d\n", type);
490 
491 	mqd = dqm->mqds[type];
492 	if (!mqd) {
493 		mqd = mqd_manager_init(type, dqm->dev);
494 		if (!mqd)
495 			pr_err("mqd manager is NULL");
496 		dqm->mqds[type] = mqd;
497 	}
498 
499 	return mqd;
500 }
501 
502 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
503 					struct qcm_process_device *qpd)
504 {
505 	struct queue *q;
506 	struct mqd_manager *mqd;
507 	struct kfd_process_device *pdd;
508 	int retval = 0;
509 
510 	mutex_lock(&dqm->lock);
511 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
512 		goto out;
513 
514 	pdd = qpd_to_pdd(qpd);
515 	pr_info_ratelimited("Evicting PASID %u queues\n",
516 			    pdd->process->pasid);
517 
518 	/* unactivate all active queues on the qpd */
519 	list_for_each_entry(q, &qpd->queues_list, list) {
520 		if (!q->properties.is_active)
521 			continue;
522 		mqd = dqm->ops.get_mqd_manager(dqm,
523 			get_mqd_type_from_queue_type(q->properties.type));
524 		if (!mqd) { /* should not be here */
525 			pr_err("Cannot evict queue, mqd mgr is NULL\n");
526 			retval = -ENOMEM;
527 			goto out;
528 		}
529 		q->properties.is_evicted = true;
530 		q->properties.is_active = false;
531 		retval = mqd->destroy_mqd(mqd, q->mqd,
532 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
533 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
534 		if (retval)
535 			goto out;
536 		dqm->queue_count--;
537 	}
538 
539 out:
540 	mutex_unlock(&dqm->lock);
541 	return retval;
542 }
543 
544 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
545 				      struct qcm_process_device *qpd)
546 {
547 	struct queue *q;
548 	struct kfd_process_device *pdd;
549 	int retval = 0;
550 
551 	mutex_lock(&dqm->lock);
552 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
553 		goto out;
554 
555 	pdd = qpd_to_pdd(qpd);
556 	pr_info_ratelimited("Evicting PASID %u queues\n",
557 			    pdd->process->pasid);
558 
559 	/* unactivate all active queues on the qpd */
560 	list_for_each_entry(q, &qpd->queues_list, list) {
561 		if (!q->properties.is_active)
562 			continue;
563 		q->properties.is_evicted = true;
564 		q->properties.is_active = false;
565 		dqm->queue_count--;
566 	}
567 	retval = execute_queues_cpsch(dqm,
568 				qpd->is_debug ?
569 				KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
570 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
571 
572 out:
573 	mutex_unlock(&dqm->lock);
574 	return retval;
575 }
576 
577 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
578 					  struct qcm_process_device *qpd)
579 {
580 	struct queue *q;
581 	struct mqd_manager *mqd;
582 	struct kfd_process_device *pdd;
583 	uint32_t pd_base;
584 	int retval = 0;
585 
586 	pdd = qpd_to_pdd(qpd);
587 	/* Retrieve PD base */
588 	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
589 
590 	mutex_lock(&dqm->lock);
591 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
592 		goto out;
593 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
594 		qpd->evicted--;
595 		goto out;
596 	}
597 
598 	pr_info_ratelimited("Restoring PASID %u queues\n",
599 			    pdd->process->pasid);
600 
601 	/* Update PD Base in QPD */
602 	qpd->page_table_base = pd_base;
603 	pr_debug("Updated PD address to 0x%08x\n", pd_base);
604 
605 	if (!list_empty(&qpd->queues_list)) {
606 		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
607 				dqm->dev->kgd,
608 				qpd->vmid,
609 				qpd->page_table_base);
610 		kfd_flush_tlb(pdd);
611 	}
612 
613 	/* activate all active queues on the qpd */
614 	list_for_each_entry(q, &qpd->queues_list, list) {
615 		if (!q->properties.is_evicted)
616 			continue;
617 		mqd = dqm->ops.get_mqd_manager(dqm,
618 			get_mqd_type_from_queue_type(q->properties.type));
619 		if (!mqd) { /* should not be here */
620 			pr_err("Cannot restore queue, mqd mgr is NULL\n");
621 			retval = -ENOMEM;
622 			goto out;
623 		}
624 		q->properties.is_evicted = false;
625 		q->properties.is_active = true;
626 		retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
627 				       q->queue, &q->properties,
628 				       q->process->mm);
629 		if (retval)
630 			goto out;
631 		dqm->queue_count++;
632 	}
633 	qpd->evicted = 0;
634 out:
635 	mutex_unlock(&dqm->lock);
636 	return retval;
637 }
638 
639 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
640 					struct qcm_process_device *qpd)
641 {
642 	struct queue *q;
643 	struct kfd_process_device *pdd;
644 	uint32_t pd_base;
645 	int retval = 0;
646 
647 	pdd = qpd_to_pdd(qpd);
648 	/* Retrieve PD base */
649 	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
650 
651 	mutex_lock(&dqm->lock);
652 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
653 		goto out;
654 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
655 		qpd->evicted--;
656 		goto out;
657 	}
658 
659 	pr_info_ratelimited("Restoring PASID %u queues\n",
660 			    pdd->process->pasid);
661 
662 	/* Update PD Base in QPD */
663 	qpd->page_table_base = pd_base;
664 	pr_debug("Updated PD address to 0x%08x\n", pd_base);
665 
666 	/* activate all active queues on the qpd */
667 	list_for_each_entry(q, &qpd->queues_list, list) {
668 		if (!q->properties.is_evicted)
669 			continue;
670 		q->properties.is_evicted = false;
671 		q->properties.is_active = true;
672 		dqm->queue_count++;
673 	}
674 	retval = execute_queues_cpsch(dqm,
675 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
676 	if (!retval)
677 		qpd->evicted = 0;
678 out:
679 	mutex_unlock(&dqm->lock);
680 	return retval;
681 }
682 
683 static int register_process(struct device_queue_manager *dqm,
684 					struct qcm_process_device *qpd)
685 {
686 	struct device_process_node *n;
687 	struct kfd_process_device *pdd;
688 	uint32_t pd_base;
689 	int retval;
690 
691 	n = kzalloc(sizeof(*n), GFP_KERNEL);
692 	if (!n)
693 		return -ENOMEM;
694 
695 	n->qpd = qpd;
696 
697 	pdd = qpd_to_pdd(qpd);
698 	/* Retrieve PD base */
699 	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
700 
701 	mutex_lock(&dqm->lock);
702 	list_add(&n->list, &dqm->queues);
703 
704 	/* Update PD Base in QPD */
705 	qpd->page_table_base = pd_base;
706 
707 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
708 
709 	dqm->processes_count++;
710 
711 	mutex_unlock(&dqm->lock);
712 
713 	return retval;
714 }
715 
716 static int unregister_process(struct device_queue_manager *dqm,
717 					struct qcm_process_device *qpd)
718 {
719 	int retval;
720 	struct device_process_node *cur, *next;
721 
722 	pr_debug("qpd->queues_list is %s\n",
723 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
724 
725 	retval = 0;
726 	mutex_lock(&dqm->lock);
727 
728 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
729 		if (qpd == cur->qpd) {
730 			list_del(&cur->list);
731 			kfree(cur);
732 			dqm->processes_count--;
733 			goto out;
734 		}
735 	}
736 	/* qpd not found in dqm list */
737 	retval = 1;
738 out:
739 	mutex_unlock(&dqm->lock);
740 	return retval;
741 }
742 
743 static int
744 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
745 			unsigned int vmid)
746 {
747 	uint32_t pasid_mapping;
748 
749 	pasid_mapping = (pasid == 0) ? 0 :
750 		(uint32_t)pasid |
751 		ATC_VMID_PASID_MAPPING_VALID;
752 
753 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
754 						dqm->dev->kgd, pasid_mapping,
755 						vmid);
756 }
757 
758 static void init_interrupts(struct device_queue_manager *dqm)
759 {
760 	unsigned int i;
761 
762 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
763 		if (is_pipe_enabled(dqm, 0, i))
764 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
765 }
766 
767 static int initialize_nocpsch(struct device_queue_manager *dqm)
768 {
769 	int pipe, queue;
770 
771 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
772 
773 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
774 					sizeof(unsigned int), GFP_KERNEL);
775 	if (!dqm->allocated_queues)
776 		return -ENOMEM;
777 
778 	mutex_init(&dqm->lock);
779 	INIT_LIST_HEAD(&dqm->queues);
780 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
781 	dqm->sdma_queue_count = 0;
782 
783 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
784 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
785 
786 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
787 			if (test_bit(pipe_offset + queue,
788 				     dqm->dev->shared_resources.queue_bitmap))
789 				dqm->allocated_queues[pipe] |= 1 << queue;
790 	}
791 
792 	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
793 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
794 
795 	return 0;
796 }
797 
798 static void uninitialize(struct device_queue_manager *dqm)
799 {
800 	int i;
801 
802 	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
803 
804 	kfree(dqm->allocated_queues);
805 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
806 		kfree(dqm->mqds[i]);
807 	mutex_destroy(&dqm->lock);
808 	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
809 }
810 
811 static int start_nocpsch(struct device_queue_manager *dqm)
812 {
813 	init_interrupts(dqm);
814 	return pm_init(&dqm->packets, dqm);
815 }
816 
817 static int stop_nocpsch(struct device_queue_manager *dqm)
818 {
819 	pm_uninit(&dqm->packets);
820 	return 0;
821 }
822 
823 static int allocate_sdma_queue(struct device_queue_manager *dqm,
824 				unsigned int *sdma_queue_id)
825 {
826 	int bit;
827 
828 	if (dqm->sdma_bitmap == 0)
829 		return -ENOMEM;
830 
831 	bit = ffs(dqm->sdma_bitmap) - 1;
832 	dqm->sdma_bitmap &= ~(1 << bit);
833 	*sdma_queue_id = bit;
834 
835 	return 0;
836 }
837 
838 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
839 				unsigned int sdma_queue_id)
840 {
841 	if (sdma_queue_id >= CIK_SDMA_QUEUES)
842 		return;
843 	dqm->sdma_bitmap |= (1 << sdma_queue_id);
844 }
845 
846 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
847 					struct queue *q,
848 					struct qcm_process_device *qpd)
849 {
850 	struct mqd_manager *mqd;
851 	int retval;
852 
853 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
854 	if (!mqd)
855 		return -ENOMEM;
856 
857 	retval = allocate_sdma_queue(dqm, &q->sdma_id);
858 	if (retval)
859 		return retval;
860 
861 	q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
862 	q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
863 
864 	pr_debug("SDMA id is:    %d\n", q->sdma_id);
865 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
866 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
867 
868 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
869 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
870 				&q->gart_mqd_addr, &q->properties);
871 	if (retval)
872 		goto out_deallocate_sdma_queue;
873 
874 	retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
875 	if (retval)
876 		goto out_uninit_mqd;
877 
878 	return 0;
879 
880 out_uninit_mqd:
881 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
882 out_deallocate_sdma_queue:
883 	deallocate_sdma_queue(dqm, q->sdma_id);
884 
885 	return retval;
886 }
887 
888 /*
889  * Device Queue Manager implementation for cp scheduler
890  */
891 
892 static int set_sched_resources(struct device_queue_manager *dqm)
893 {
894 	int i, mec;
895 	struct scheduling_resources res;
896 
897 	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
898 
899 	res.queue_mask = 0;
900 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
901 		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
902 			/ dqm->dev->shared_resources.num_pipe_per_mec;
903 
904 		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
905 			continue;
906 
907 		/* only acquire queues from the first MEC */
908 		if (mec > 0)
909 			continue;
910 
911 		/* This situation may be hit in the future if a new HW
912 		 * generation exposes more than 64 queues. If so, the
913 		 * definition of res.queue_mask needs updating
914 		 */
915 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
916 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
917 			break;
918 		}
919 
920 		res.queue_mask |= (1ull << i);
921 	}
922 	res.gws_mask = res.oac_mask = res.gds_heap_base =
923 						res.gds_heap_size = 0;
924 
925 	pr_debug("Scheduling resources:\n"
926 			"vmid mask: 0x%8X\n"
927 			"queue mask: 0x%8llX\n",
928 			res.vmid_mask, res.queue_mask);
929 
930 	return pm_send_set_resources(&dqm->packets, &res);
931 }
932 
933 static int initialize_cpsch(struct device_queue_manager *dqm)
934 {
935 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
936 
937 	mutex_init(&dqm->lock);
938 	INIT_LIST_HEAD(&dqm->queues);
939 	dqm->queue_count = dqm->processes_count = 0;
940 	dqm->sdma_queue_count = 0;
941 	dqm->active_runlist = false;
942 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
943 
944 	return 0;
945 }
946 
947 static int start_cpsch(struct device_queue_manager *dqm)
948 {
949 	int retval;
950 
951 	retval = 0;
952 
953 	retval = pm_init(&dqm->packets, dqm);
954 	if (retval)
955 		goto fail_packet_manager_init;
956 
957 	retval = set_sched_resources(dqm);
958 	if (retval)
959 		goto fail_set_sched_resources;
960 
961 	pr_debug("Allocating fence memory\n");
962 
963 	/* allocate fence memory on the gart */
964 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
965 					&dqm->fence_mem);
966 
967 	if (retval)
968 		goto fail_allocate_vidmem;
969 
970 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
971 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
972 
973 	init_interrupts(dqm);
974 
975 	mutex_lock(&dqm->lock);
976 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
977 	mutex_unlock(&dqm->lock);
978 
979 	return 0;
980 fail_allocate_vidmem:
981 fail_set_sched_resources:
982 	pm_uninit(&dqm->packets);
983 fail_packet_manager_init:
984 	return retval;
985 }
986 
987 static int stop_cpsch(struct device_queue_manager *dqm)
988 {
989 	mutex_lock(&dqm->lock);
990 	unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
991 	mutex_unlock(&dqm->lock);
992 
993 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
994 	pm_uninit(&dqm->packets);
995 
996 	return 0;
997 }
998 
999 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1000 					struct kernel_queue *kq,
1001 					struct qcm_process_device *qpd)
1002 {
1003 	mutex_lock(&dqm->lock);
1004 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1005 		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1006 				dqm->total_queue_count);
1007 		mutex_unlock(&dqm->lock);
1008 		return -EPERM;
1009 	}
1010 
1011 	/*
1012 	 * Unconditionally increment this counter, regardless of the queue's
1013 	 * type or whether the queue is active.
1014 	 */
1015 	dqm->total_queue_count++;
1016 	pr_debug("Total of %d queues are accountable so far\n",
1017 			dqm->total_queue_count);
1018 
1019 	list_add(&kq->list, &qpd->priv_queue_list);
1020 	dqm->queue_count++;
1021 	qpd->is_debug = true;
1022 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1023 	mutex_unlock(&dqm->lock);
1024 
1025 	return 0;
1026 }
1027 
1028 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1029 					struct kernel_queue *kq,
1030 					struct qcm_process_device *qpd)
1031 {
1032 	mutex_lock(&dqm->lock);
1033 	list_del(&kq->list);
1034 	dqm->queue_count--;
1035 	qpd->is_debug = false;
1036 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1037 	/*
1038 	 * Unconditionally decrement this counter, regardless of the queue's
1039 	 * type.
1040 	 */
1041 	dqm->total_queue_count--;
1042 	pr_debug("Total of %d queues are accountable so far\n",
1043 			dqm->total_queue_count);
1044 	mutex_unlock(&dqm->lock);
1045 }
1046 
1047 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1048 			struct qcm_process_device *qpd)
1049 {
1050 	int retval;
1051 	struct mqd_manager *mqd;
1052 
1053 	retval = 0;
1054 
1055 	mutex_lock(&dqm->lock);
1056 
1057 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1058 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1059 				dqm->total_queue_count);
1060 		retval = -EPERM;
1061 		goto out_unlock;
1062 	}
1063 
1064 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1065 		retval = allocate_sdma_queue(dqm, &q->sdma_id);
1066 		if (retval)
1067 			goto out_unlock;
1068 		q->properties.sdma_queue_id =
1069 			q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
1070 		q->properties.sdma_engine_id =
1071 			q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
1072 	}
1073 	mqd = dqm->ops.get_mqd_manager(dqm,
1074 			get_mqd_type_from_queue_type(q->properties.type));
1075 
1076 	if (!mqd) {
1077 		retval = -ENOMEM;
1078 		goto out_deallocate_sdma_queue;
1079 	}
1080 	/*
1081 	 * Eviction state logic: we only mark active queues as evicted
1082 	 * to avoid the overhead of restoring inactive queues later
1083 	 */
1084 	if (qpd->evicted)
1085 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
1086 					    q->properties.queue_percent > 0 &&
1087 					    q->properties.queue_address != 0);
1088 
1089 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1090 
1091 	q->properties.tba_addr = qpd->tba_addr;
1092 	q->properties.tma_addr = qpd->tma_addr;
1093 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
1094 				&q->gart_mqd_addr, &q->properties);
1095 	if (retval)
1096 		goto out_deallocate_sdma_queue;
1097 
1098 	list_add(&q->list, &qpd->queues_list);
1099 	qpd->queue_count++;
1100 	if (q->properties.is_active) {
1101 		dqm->queue_count++;
1102 		retval = execute_queues_cpsch(dqm,
1103 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1104 	}
1105 
1106 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1107 		dqm->sdma_queue_count++;
1108 	/*
1109 	 * Unconditionally increment this counter, regardless of the queue's
1110 	 * type or whether the queue is active.
1111 	 */
1112 	dqm->total_queue_count++;
1113 
1114 	pr_debug("Total of %d queues are accountable so far\n",
1115 			dqm->total_queue_count);
1116 
1117 	mutex_unlock(&dqm->lock);
1118 	return retval;
1119 
1120 out_deallocate_sdma_queue:
1121 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1122 		deallocate_sdma_queue(dqm, q->sdma_id);
1123 out_unlock:
1124 	mutex_unlock(&dqm->lock);
1125 	return retval;
1126 }
1127 
1128 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1129 				unsigned int fence_value,
1130 				unsigned int timeout_ms)
1131 {
1132 	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1133 
1134 	while (*fence_addr != fence_value) {
1135 		if (time_after(jiffies, end_jiffies)) {
1136 			pr_err("qcm fence wait loop timeout expired\n");
1137 			return -ETIME;
1138 		}
1139 		schedule();
1140 	}
1141 
1142 	return 0;
1143 }
1144 
1145 static int unmap_sdma_queues(struct device_queue_manager *dqm,
1146 				unsigned int sdma_engine)
1147 {
1148 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1149 			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
1150 			sdma_engine);
1151 }
1152 
1153 /* dqm->lock mutex has to be locked before calling this function */
1154 static int map_queues_cpsch(struct device_queue_manager *dqm)
1155 {
1156 	int retval;
1157 
1158 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1159 		return 0;
1160 
1161 	if (dqm->active_runlist)
1162 		return 0;
1163 
1164 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1165 	if (retval) {
1166 		pr_err("failed to execute runlist\n");
1167 		return retval;
1168 	}
1169 	dqm->active_runlist = true;
1170 
1171 	return retval;
1172 }
1173 
1174 /* dqm->lock mutex has to be locked before calling this function */
1175 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1176 				enum kfd_unmap_queues_filter filter,
1177 				uint32_t filter_param)
1178 {
1179 	int retval = 0;
1180 
1181 	if (!dqm->active_runlist)
1182 		return retval;
1183 
1184 	pr_debug("Before destroying queues, sdma queue count is : %u\n",
1185 		dqm->sdma_queue_count);
1186 
1187 	if (dqm->sdma_queue_count > 0) {
1188 		unmap_sdma_queues(dqm, 0);
1189 		unmap_sdma_queues(dqm, 1);
1190 	}
1191 
1192 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1193 			filter, filter_param, false, 0);
1194 	if (retval)
1195 		return retval;
1196 
1197 	*dqm->fence_addr = KFD_FENCE_INIT;
1198 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1199 				KFD_FENCE_COMPLETED);
1200 	/* should be timed out */
1201 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1202 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1203 	if (retval)
1204 		return retval;
1205 
1206 	pm_release_ib(&dqm->packets);
1207 	dqm->active_runlist = false;
1208 
1209 	return retval;
1210 }
1211 
1212 /* dqm->lock mutex has to be locked before calling this function */
1213 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1214 				enum kfd_unmap_queues_filter filter,
1215 				uint32_t filter_param)
1216 {
1217 	int retval;
1218 
1219 	retval = unmap_queues_cpsch(dqm, filter, filter_param);
1220 	if (retval) {
1221 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1222 		return retval;
1223 	}
1224 
1225 	return map_queues_cpsch(dqm);
1226 }
1227 
1228 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1229 				struct qcm_process_device *qpd,
1230 				struct queue *q)
1231 {
1232 	int retval;
1233 	struct mqd_manager *mqd;
1234 	bool preempt_all_queues;
1235 
1236 	preempt_all_queues = false;
1237 
1238 	retval = 0;
1239 
1240 	/* remove queue from list to prevent rescheduling after preemption */
1241 	mutex_lock(&dqm->lock);
1242 
1243 	if (qpd->is_debug) {
1244 		/*
1245 		 * error, currently we do not allow to destroy a queue
1246 		 * of a currently debugged process
1247 		 */
1248 		retval = -EBUSY;
1249 		goto failed_try_destroy_debugged_queue;
1250 
1251 	}
1252 
1253 	mqd = dqm->ops.get_mqd_manager(dqm,
1254 			get_mqd_type_from_queue_type(q->properties.type));
1255 	if (!mqd) {
1256 		retval = -ENOMEM;
1257 		goto failed;
1258 	}
1259 
1260 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1261 		dqm->sdma_queue_count--;
1262 		deallocate_sdma_queue(dqm, q->sdma_id);
1263 	}
1264 
1265 	list_del(&q->list);
1266 	qpd->queue_count--;
1267 	if (q->properties.is_active) {
1268 		dqm->queue_count--;
1269 		retval = execute_queues_cpsch(dqm,
1270 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1271 		if (retval == -ETIME)
1272 			qpd->reset_wavefronts = true;
1273 	}
1274 
1275 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1276 
1277 	/*
1278 	 * Unconditionally decrement this counter, regardless of the queue's
1279 	 * type
1280 	 */
1281 	dqm->total_queue_count--;
1282 	pr_debug("Total of %d queues are accountable so far\n",
1283 			dqm->total_queue_count);
1284 
1285 	mutex_unlock(&dqm->lock);
1286 
1287 	return retval;
1288 
1289 failed:
1290 failed_try_destroy_debugged_queue:
1291 
1292 	mutex_unlock(&dqm->lock);
1293 	return retval;
1294 }
1295 
1296 /*
1297  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1298  * stay in user mode.
1299  */
1300 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1301 /* APE1 limit is inclusive and 64K aligned. */
1302 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1303 
1304 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1305 				   struct qcm_process_device *qpd,
1306 				   enum cache_policy default_policy,
1307 				   enum cache_policy alternate_policy,
1308 				   void __user *alternate_aperture_base,
1309 				   uint64_t alternate_aperture_size)
1310 {
1311 	bool retval;
1312 
1313 	mutex_lock(&dqm->lock);
1314 
1315 	if (alternate_aperture_size == 0) {
1316 		/* base > limit disables APE1 */
1317 		qpd->sh_mem_ape1_base = 1;
1318 		qpd->sh_mem_ape1_limit = 0;
1319 	} else {
1320 		/*
1321 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1322 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1323 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1324 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1325 		 * Verify that the base and size parameters can be
1326 		 * represented in this format and convert them.
1327 		 * Additionally restrict APE1 to user-mode addresses.
1328 		 */
1329 
1330 		uint64_t base = (uintptr_t)alternate_aperture_base;
1331 		uint64_t limit = base + alternate_aperture_size - 1;
1332 
1333 		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1334 		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1335 			retval = false;
1336 			goto out;
1337 		}
1338 
1339 		qpd->sh_mem_ape1_base = base >> 16;
1340 		qpd->sh_mem_ape1_limit = limit >> 16;
1341 	}
1342 
1343 	retval = dqm->asic_ops.set_cache_memory_policy(
1344 			dqm,
1345 			qpd,
1346 			default_policy,
1347 			alternate_policy,
1348 			alternate_aperture_base,
1349 			alternate_aperture_size);
1350 
1351 	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1352 		program_sh_mem_settings(dqm, qpd);
1353 
1354 	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1355 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1356 		qpd->sh_mem_ape1_limit);
1357 
1358 out:
1359 	mutex_unlock(&dqm->lock);
1360 	return retval;
1361 }
1362 
1363 static int set_trap_handler(struct device_queue_manager *dqm,
1364 				struct qcm_process_device *qpd,
1365 				uint64_t tba_addr,
1366 				uint64_t tma_addr)
1367 {
1368 	uint64_t *tma;
1369 
1370 	if (dqm->dev->cwsr_enabled) {
1371 		/* Jump from CWSR trap handler to user trap */
1372 		tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1373 		tma[0] = tba_addr;
1374 		tma[1] = tma_addr;
1375 	} else {
1376 		qpd->tba_addr = tba_addr;
1377 		qpd->tma_addr = tma_addr;
1378 	}
1379 
1380 	return 0;
1381 }
1382 
1383 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1384 		struct qcm_process_device *qpd)
1385 {
1386 	struct queue *q, *next;
1387 	struct device_process_node *cur, *next_dpn;
1388 	int retval = 0;
1389 
1390 	mutex_lock(&dqm->lock);
1391 
1392 	/* Clear all user mode queues */
1393 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1394 		int ret;
1395 
1396 		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1397 		if (ret)
1398 			retval = ret;
1399 	}
1400 
1401 	/* Unregister process */
1402 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1403 		if (qpd == cur->qpd) {
1404 			list_del(&cur->list);
1405 			kfree(cur);
1406 			dqm->processes_count--;
1407 			break;
1408 		}
1409 	}
1410 
1411 	mutex_unlock(&dqm->lock);
1412 	return retval;
1413 }
1414 
1415 
1416 static int process_termination_cpsch(struct device_queue_manager *dqm,
1417 		struct qcm_process_device *qpd)
1418 {
1419 	int retval;
1420 	struct queue *q, *next;
1421 	struct kernel_queue *kq, *kq_next;
1422 	struct mqd_manager *mqd;
1423 	struct device_process_node *cur, *next_dpn;
1424 	enum kfd_unmap_queues_filter filter =
1425 		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1426 
1427 	retval = 0;
1428 
1429 	mutex_lock(&dqm->lock);
1430 
1431 	/* Clean all kernel queues */
1432 	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1433 		list_del(&kq->list);
1434 		dqm->queue_count--;
1435 		qpd->is_debug = false;
1436 		dqm->total_queue_count--;
1437 		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1438 	}
1439 
1440 	/* Clear all user mode queues */
1441 	list_for_each_entry(q, &qpd->queues_list, list) {
1442 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1443 			dqm->sdma_queue_count--;
1444 			deallocate_sdma_queue(dqm, q->sdma_id);
1445 		}
1446 
1447 		if (q->properties.is_active)
1448 			dqm->queue_count--;
1449 
1450 		dqm->total_queue_count--;
1451 	}
1452 
1453 	/* Unregister process */
1454 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1455 		if (qpd == cur->qpd) {
1456 			list_del(&cur->list);
1457 			kfree(cur);
1458 			dqm->processes_count--;
1459 			break;
1460 		}
1461 	}
1462 
1463 	retval = execute_queues_cpsch(dqm, filter, 0);
1464 	if (retval || qpd->reset_wavefronts) {
1465 		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1466 		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1467 		qpd->reset_wavefronts = false;
1468 	}
1469 
1470 	/* lastly, free mqd resources */
1471 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1472 		mqd = dqm->ops.get_mqd_manager(dqm,
1473 			get_mqd_type_from_queue_type(q->properties.type));
1474 		if (!mqd) {
1475 			retval = -ENOMEM;
1476 			goto out;
1477 		}
1478 		list_del(&q->list);
1479 		qpd->queue_count--;
1480 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1481 	}
1482 
1483 out:
1484 	mutex_unlock(&dqm->lock);
1485 	return retval;
1486 }
1487 
1488 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1489 {
1490 	struct device_queue_manager *dqm;
1491 
1492 	pr_debug("Loading device queue manager\n");
1493 
1494 	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1495 	if (!dqm)
1496 		return NULL;
1497 
1498 	switch (dev->device_info->asic_family) {
1499 	/* HWS is not available on Hawaii. */
1500 	case CHIP_HAWAII:
1501 	/* HWS depends on CWSR for timely dequeue. CWSR is not
1502 	 * available on Tonga.
1503 	 *
1504 	 * FIXME: This argument also applies to Kaveri.
1505 	 */
1506 	case CHIP_TONGA:
1507 		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1508 		break;
1509 	default:
1510 		dqm->sched_policy = sched_policy;
1511 		break;
1512 	}
1513 
1514 	dqm->dev = dev;
1515 	switch (dqm->sched_policy) {
1516 	case KFD_SCHED_POLICY_HWS:
1517 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1518 		/* initialize dqm for cp scheduling */
1519 		dqm->ops.create_queue = create_queue_cpsch;
1520 		dqm->ops.initialize = initialize_cpsch;
1521 		dqm->ops.start = start_cpsch;
1522 		dqm->ops.stop = stop_cpsch;
1523 		dqm->ops.destroy_queue = destroy_queue_cpsch;
1524 		dqm->ops.update_queue = update_queue;
1525 		dqm->ops.get_mqd_manager = get_mqd_manager;
1526 		dqm->ops.register_process = register_process;
1527 		dqm->ops.unregister_process = unregister_process;
1528 		dqm->ops.uninitialize = uninitialize;
1529 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1530 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1531 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1532 		dqm->ops.set_trap_handler = set_trap_handler;
1533 		dqm->ops.process_termination = process_termination_cpsch;
1534 		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1535 		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1536 		break;
1537 	case KFD_SCHED_POLICY_NO_HWS:
1538 		/* initialize dqm for no cp scheduling */
1539 		dqm->ops.start = start_nocpsch;
1540 		dqm->ops.stop = stop_nocpsch;
1541 		dqm->ops.create_queue = create_queue_nocpsch;
1542 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
1543 		dqm->ops.update_queue = update_queue;
1544 		dqm->ops.get_mqd_manager = get_mqd_manager;
1545 		dqm->ops.register_process = register_process;
1546 		dqm->ops.unregister_process = unregister_process;
1547 		dqm->ops.initialize = initialize_nocpsch;
1548 		dqm->ops.uninitialize = uninitialize;
1549 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1550 		dqm->ops.set_trap_handler = set_trap_handler;
1551 		dqm->ops.process_termination = process_termination_nocpsch;
1552 		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1553 		dqm->ops.restore_process_queues =
1554 			restore_process_queues_nocpsch;
1555 		break;
1556 	default:
1557 		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1558 		goto out_free;
1559 	}
1560 
1561 	switch (dev->device_info->asic_family) {
1562 	case CHIP_CARRIZO:
1563 		device_queue_manager_init_vi(&dqm->asic_ops);
1564 		break;
1565 
1566 	case CHIP_KAVERI:
1567 		device_queue_manager_init_cik(&dqm->asic_ops);
1568 		break;
1569 
1570 	case CHIP_HAWAII:
1571 		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1572 		break;
1573 
1574 	case CHIP_TONGA:
1575 	case CHIP_FIJI:
1576 	case CHIP_POLARIS10:
1577 	case CHIP_POLARIS11:
1578 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1579 		break;
1580 	default:
1581 		WARN(1, "Unexpected ASIC family %u",
1582 		     dev->device_info->asic_family);
1583 		goto out_free;
1584 	}
1585 
1586 	if (!dqm->ops.initialize(dqm))
1587 		return dqm;
1588 
1589 out_free:
1590 	kfree(dqm);
1591 	return NULL;
1592 }
1593 
1594 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1595 {
1596 	dqm->ops.uninitialize(dqm);
1597 	kfree(dqm);
1598 }
1599 
1600 #if defined(CONFIG_DEBUG_FS)
1601 
1602 static void seq_reg_dump(struct seq_file *m,
1603 			 uint32_t (*dump)[2], uint32_t n_regs)
1604 {
1605 	uint32_t i, count;
1606 
1607 	for (i = 0, count = 0; i < n_regs; i++) {
1608 		if (count == 0 ||
1609 		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1610 			seq_printf(m, "%s    %08x: %08x",
1611 				   i ? "\n" : "",
1612 				   dump[i][0], dump[i][1]);
1613 			count = 7;
1614 		} else {
1615 			seq_printf(m, " %08x", dump[i][1]);
1616 			count--;
1617 		}
1618 	}
1619 
1620 	seq_puts(m, "\n");
1621 }
1622 
1623 int dqm_debugfs_hqds(struct seq_file *m, void *data)
1624 {
1625 	struct device_queue_manager *dqm = data;
1626 	uint32_t (*dump)[2], n_regs;
1627 	int pipe, queue;
1628 	int r = 0;
1629 
1630 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1631 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
1632 
1633 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
1634 			if (!test_bit(pipe_offset + queue,
1635 				      dqm->dev->shared_resources.queue_bitmap))
1636 				continue;
1637 
1638 			r = dqm->dev->kfd2kgd->hqd_dump(
1639 				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1640 			if (r)
1641 				break;
1642 
1643 			seq_printf(m, "  CP Pipe %d, Queue %d\n",
1644 				  pipe, queue);
1645 			seq_reg_dump(m, dump, n_regs);
1646 
1647 			kfree(dump);
1648 		}
1649 	}
1650 
1651 	for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
1652 		for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
1653 			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
1654 				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1655 			if (r)
1656 				break;
1657 
1658 			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
1659 				  pipe, queue);
1660 			seq_reg_dump(m, dump, n_regs);
1661 
1662 			kfree(dump);
1663 		}
1664 	}
1665 
1666 	return r;
1667 }
1668 
1669 #endif
1670