1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
31 #include "kfd_priv.h"
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
34 #include "cik_regs.h"
35 #include "kfd_kernel_queue.h"
36 
37 /* Size of the per-pipe EOP queue */
38 #define CIK_HPD_EOP_BYTES_LOG2 11
39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
40 
41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
42 					unsigned int pasid, unsigned int vmid);
43 
44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
45 					struct queue *q,
46 					struct qcm_process_device *qpd);
47 
48 static int execute_queues_cpsch(struct device_queue_manager *dqm,
49 				enum kfd_unmap_queues_filter filter,
50 				uint32_t filter_param);
51 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
52 				enum kfd_unmap_queues_filter filter,
53 				uint32_t filter_param);
54 
55 static int map_queues_cpsch(struct device_queue_manager *dqm);
56 
57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
58 					struct queue *q,
59 					struct qcm_process_device *qpd);
60 
61 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
62 				unsigned int sdma_queue_id);
63 
64 static inline
65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
66 {
67 	if (type == KFD_QUEUE_TYPE_SDMA)
68 		return KFD_MQD_TYPE_SDMA;
69 	return KFD_MQD_TYPE_CP;
70 }
71 
72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
73 {
74 	int i;
75 	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
76 		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;
77 
78 	/* queue is available for KFD usage if bit is 1 */
79 	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
80 		if (test_bit(pipe_offset + i,
81 			      dqm->dev->shared_resources.queue_bitmap))
82 			return true;
83 	return false;
84 }
85 
86 unsigned int get_queues_num(struct device_queue_manager *dqm)
87 {
88 	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
89 				KGD_MAX_QUEUES);
90 }
91 
92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
93 {
94 	return dqm->dev->shared_resources.num_queue_per_pipe;
95 }
96 
97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
98 {
99 	return dqm->dev->shared_resources.num_pipe_per_mec;
100 }
101 
102 void program_sh_mem_settings(struct device_queue_manager *dqm,
103 					struct qcm_process_device *qpd)
104 {
105 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
106 						dqm->dev->kgd, qpd->vmid,
107 						qpd->sh_mem_config,
108 						qpd->sh_mem_ape1_base,
109 						qpd->sh_mem_ape1_limit,
110 						qpd->sh_mem_bases);
111 }
112 
113 static int allocate_vmid(struct device_queue_manager *dqm,
114 			struct qcm_process_device *qpd,
115 			struct queue *q)
116 {
117 	int bit, allocated_vmid;
118 
119 	if (dqm->vmid_bitmap == 0)
120 		return -ENOMEM;
121 
122 	bit = ffs(dqm->vmid_bitmap) - 1;
123 	dqm->vmid_bitmap &= ~(1 << bit);
124 
125 	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
126 	pr_debug("vmid allocation %d\n", allocated_vmid);
127 	qpd->vmid = allocated_vmid;
128 	q->properties.vmid = allocated_vmid;
129 
130 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
131 	program_sh_mem_settings(dqm, qpd);
132 
133 	/* qpd->page_table_base is set earlier when register_process()
134 	 * is called, i.e. when the first queue is created.
135 	 */
136 	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
137 			qpd->vmid,
138 			qpd->page_table_base);
139 	/* invalidate the VM context after pasid and vmid mapping is set up */
140 	kfd_flush_tlb(qpd_to_pdd(qpd));
141 
142 	return 0;
143 }
144 
145 static void deallocate_vmid(struct device_queue_manager *dqm,
146 				struct qcm_process_device *qpd,
147 				struct queue *q)
148 {
149 	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
150 
151 	kfd_flush_tlb(qpd_to_pdd(qpd));
152 
153 	/* Release the vmid mapping */
154 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
155 
156 	dqm->vmid_bitmap |= (1 << bit);
157 	qpd->vmid = 0;
158 	q->properties.vmid = 0;
159 }
160 
161 static int create_queue_nocpsch(struct device_queue_manager *dqm,
162 				struct queue *q,
163 				struct qcm_process_device *qpd)
164 {
165 	int retval;
166 
167 	print_queue(q);
168 
169 	mutex_lock(&dqm->lock);
170 
171 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
172 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
173 				dqm->total_queue_count);
174 		retval = -EPERM;
175 		goto out_unlock;
176 	}
177 
178 	if (list_empty(&qpd->queues_list)) {
179 		retval = allocate_vmid(dqm, qpd, q);
180 		if (retval)
181 			goto out_unlock;
182 	}
183 	q->properties.vmid = qpd->vmid;
184 	/*
185 	 * Eviction state logic: we only mark active queues as evicted
186 	 * to avoid the overhead of restoring inactive queues later
187 	 */
188 	if (qpd->evicted)
189 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
190 					    q->properties.queue_percent > 0 &&
191 					    q->properties.queue_address != 0);
192 
193 	q->properties.tba_addr = qpd->tba_addr;
194 	q->properties.tma_addr = qpd->tma_addr;
195 
196 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
197 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
198 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
199 		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
200 	else
201 		retval = -EINVAL;
202 
203 	if (retval) {
204 		if (list_empty(&qpd->queues_list))
205 			deallocate_vmid(dqm, qpd, q);
206 		goto out_unlock;
207 	}
208 
209 	list_add(&q->list, &qpd->queues_list);
210 	qpd->queue_count++;
211 	if (q->properties.is_active)
212 		dqm->queue_count++;
213 
214 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
215 		dqm->sdma_queue_count++;
216 
217 	/*
218 	 * Unconditionally increment this counter, regardless of the queue's
219 	 * type or whether the queue is active.
220 	 */
221 	dqm->total_queue_count++;
222 	pr_debug("Total of %d queues are accountable so far\n",
223 			dqm->total_queue_count);
224 
225 out_unlock:
226 	mutex_unlock(&dqm->lock);
227 	return retval;
228 }
229 
230 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
231 {
232 	bool set;
233 	int pipe, bit, i;
234 
235 	set = false;
236 
237 	for (pipe = dqm->next_pipe_to_allocate, i = 0;
238 			i < get_pipes_per_mec(dqm);
239 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
240 
241 		if (!is_pipe_enabled(dqm, 0, pipe))
242 			continue;
243 
244 		if (dqm->allocated_queues[pipe] != 0) {
245 			bit = ffs(dqm->allocated_queues[pipe]) - 1;
246 			dqm->allocated_queues[pipe] &= ~(1 << bit);
247 			q->pipe = pipe;
248 			q->queue = bit;
249 			set = true;
250 			break;
251 		}
252 	}
253 
254 	if (!set)
255 		return -EBUSY;
256 
257 	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
258 	/* horizontal hqd allocation */
259 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
260 
261 	return 0;
262 }
263 
264 static inline void deallocate_hqd(struct device_queue_manager *dqm,
265 				struct queue *q)
266 {
267 	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
268 }
269 
270 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
271 					struct queue *q,
272 					struct qcm_process_device *qpd)
273 {
274 	int retval;
275 	struct mqd_manager *mqd;
276 
277 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
278 	if (!mqd)
279 		return -ENOMEM;
280 
281 	retval = allocate_hqd(dqm, q);
282 	if (retval)
283 		return retval;
284 
285 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
286 				&q->gart_mqd_addr, &q->properties);
287 	if (retval)
288 		goto out_deallocate_hqd;
289 
290 	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
291 			q->pipe, q->queue);
292 
293 	dqm->dev->kfd2kgd->set_scratch_backing_va(
294 			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
295 
296 	if (!q->properties.is_active)
297 		return 0;
298 
299 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
300 			       q->process->mm);
301 	if (retval)
302 		goto out_uninit_mqd;
303 
304 	return 0;
305 
306 out_uninit_mqd:
307 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
308 out_deallocate_hqd:
309 	deallocate_hqd(dqm, q);
310 
311 	return retval;
312 }
313 
314 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
315  * to avoid asynchronized access
316  */
317 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
318 				struct qcm_process_device *qpd,
319 				struct queue *q)
320 {
321 	int retval;
322 	struct mqd_manager *mqd;
323 
324 	mqd = dqm->ops.get_mqd_manager(dqm,
325 		get_mqd_type_from_queue_type(q->properties.type));
326 	if (!mqd)
327 		return -ENOMEM;
328 
329 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
330 		deallocate_hqd(dqm, q);
331 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
332 		dqm->sdma_queue_count--;
333 		deallocate_sdma_queue(dqm, q->sdma_id);
334 	} else {
335 		pr_debug("q->properties.type %d is invalid\n",
336 				q->properties.type);
337 		return -EINVAL;
338 	}
339 	dqm->total_queue_count--;
340 
341 	retval = mqd->destroy_mqd(mqd, q->mqd,
342 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
343 				KFD_UNMAP_LATENCY_MS,
344 				q->pipe, q->queue);
345 	if (retval == -ETIME)
346 		qpd->reset_wavefronts = true;
347 
348 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
349 
350 	list_del(&q->list);
351 	if (list_empty(&qpd->queues_list)) {
352 		if (qpd->reset_wavefronts) {
353 			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
354 					dqm->dev);
355 			/* dbgdev_wave_reset_wavefronts has to be called before
356 			 * deallocate_vmid(), i.e. when vmid is still in use.
357 			 */
358 			dbgdev_wave_reset_wavefronts(dqm->dev,
359 					qpd->pqm->process);
360 			qpd->reset_wavefronts = false;
361 		}
362 
363 		deallocate_vmid(dqm, qpd, q);
364 	}
365 	qpd->queue_count--;
366 	if (q->properties.is_active)
367 		dqm->queue_count--;
368 
369 	return retval;
370 }
371 
372 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
373 				struct qcm_process_device *qpd,
374 				struct queue *q)
375 {
376 	int retval;
377 
378 	mutex_lock(&dqm->lock);
379 	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
380 	mutex_unlock(&dqm->lock);
381 
382 	return retval;
383 }
384 
385 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
386 {
387 	int retval;
388 	struct mqd_manager *mqd;
389 	struct kfd_process_device *pdd;
390 	bool prev_active = false;
391 
392 	mutex_lock(&dqm->lock);
393 	pdd = kfd_get_process_device_data(q->device, q->process);
394 	if (!pdd) {
395 		retval = -ENODEV;
396 		goto out_unlock;
397 	}
398 	mqd = dqm->ops.get_mqd_manager(dqm,
399 			get_mqd_type_from_queue_type(q->properties.type));
400 	if (!mqd) {
401 		retval = -ENOMEM;
402 		goto out_unlock;
403 	}
404 	/*
405 	 * Eviction state logic: we only mark active queues as evicted
406 	 * to avoid the overhead of restoring inactive queues later
407 	 */
408 	if (pdd->qpd.evicted)
409 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
410 					    q->properties.queue_percent > 0 &&
411 					    q->properties.queue_address != 0);
412 
413 	/* Save previous activity state for counters */
414 	prev_active = q->properties.is_active;
415 
416 	/* Make sure the queue is unmapped before updating the MQD */
417 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
418 		retval = unmap_queues_cpsch(dqm,
419 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
420 		if (retval) {
421 			pr_err("unmap queue failed\n");
422 			goto out_unlock;
423 		}
424 	} else if (prev_active &&
425 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
426 		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
427 		retval = mqd->destroy_mqd(mqd, q->mqd,
428 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
429 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
430 		if (retval) {
431 			pr_err("destroy mqd failed\n");
432 			goto out_unlock;
433 		}
434 	}
435 
436 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
437 
438 	/*
439 	 * check active state vs. the previous state and modify
440 	 * counter accordingly. map_queues_cpsch uses the
441 	 * dqm->queue_count to determine whether a new runlist must be
442 	 * uploaded.
443 	 */
444 	if (q->properties.is_active && !prev_active)
445 		dqm->queue_count++;
446 	else if (!q->properties.is_active && prev_active)
447 		dqm->queue_count--;
448 
449 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
450 		retval = map_queues_cpsch(dqm);
451 	else if (q->properties.is_active &&
452 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
453 		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
454 		retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
455 				       &q->properties, q->process->mm);
456 
457 out_unlock:
458 	mutex_unlock(&dqm->lock);
459 	return retval;
460 }
461 
462 static struct mqd_manager *get_mqd_manager(
463 		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
464 {
465 	struct mqd_manager *mqd;
466 
467 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
468 		return NULL;
469 
470 	pr_debug("mqd type %d\n", type);
471 
472 	mqd = dqm->mqds[type];
473 	if (!mqd) {
474 		mqd = mqd_manager_init(type, dqm->dev);
475 		if (!mqd)
476 			pr_err("mqd manager is NULL");
477 		dqm->mqds[type] = mqd;
478 	}
479 
480 	return mqd;
481 }
482 
483 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
484 					struct qcm_process_device *qpd)
485 {
486 	struct queue *q;
487 	struct mqd_manager *mqd;
488 	struct kfd_process_device *pdd;
489 	int retval = 0;
490 
491 	mutex_lock(&dqm->lock);
492 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
493 		goto out;
494 
495 	pdd = qpd_to_pdd(qpd);
496 	pr_info_ratelimited("Evicting PASID %u queues\n",
497 			    pdd->process->pasid);
498 
499 	/* unactivate all active queues on the qpd */
500 	list_for_each_entry(q, &qpd->queues_list, list) {
501 		if (!q->properties.is_active)
502 			continue;
503 		mqd = dqm->ops.get_mqd_manager(dqm,
504 			get_mqd_type_from_queue_type(q->properties.type));
505 		if (!mqd) { /* should not be here */
506 			pr_err("Cannot evict queue, mqd mgr is NULL\n");
507 			retval = -ENOMEM;
508 			goto out;
509 		}
510 		q->properties.is_evicted = true;
511 		q->properties.is_active = false;
512 		retval = mqd->destroy_mqd(mqd, q->mqd,
513 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
514 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
515 		if (retval)
516 			goto out;
517 		dqm->queue_count--;
518 	}
519 
520 out:
521 	mutex_unlock(&dqm->lock);
522 	return retval;
523 }
524 
525 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
526 				      struct qcm_process_device *qpd)
527 {
528 	struct queue *q;
529 	struct kfd_process_device *pdd;
530 	int retval = 0;
531 
532 	mutex_lock(&dqm->lock);
533 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
534 		goto out;
535 
536 	pdd = qpd_to_pdd(qpd);
537 	pr_info_ratelimited("Evicting PASID %u queues\n",
538 			    pdd->process->pasid);
539 
540 	/* unactivate all active queues on the qpd */
541 	list_for_each_entry(q, &qpd->queues_list, list) {
542 		if (!q->properties.is_active)
543 			continue;
544 		q->properties.is_evicted = true;
545 		q->properties.is_active = false;
546 		dqm->queue_count--;
547 	}
548 	retval = execute_queues_cpsch(dqm,
549 				qpd->is_debug ?
550 				KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
551 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
552 
553 out:
554 	mutex_unlock(&dqm->lock);
555 	return retval;
556 }
557 
558 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
559 					  struct qcm_process_device *qpd)
560 {
561 	struct queue *q;
562 	struct mqd_manager *mqd;
563 	struct kfd_process_device *pdd;
564 	uint32_t pd_base;
565 	int retval = 0;
566 
567 	pdd = qpd_to_pdd(qpd);
568 	/* Retrieve PD base */
569 	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
570 
571 	mutex_lock(&dqm->lock);
572 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
573 		goto out;
574 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
575 		qpd->evicted--;
576 		goto out;
577 	}
578 
579 	pr_info_ratelimited("Restoring PASID %u queues\n",
580 			    pdd->process->pasid);
581 
582 	/* Update PD Base in QPD */
583 	qpd->page_table_base = pd_base;
584 	pr_debug("Updated PD address to 0x%08x\n", pd_base);
585 
586 	if (!list_empty(&qpd->queues_list)) {
587 		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
588 				dqm->dev->kgd,
589 				qpd->vmid,
590 				qpd->page_table_base);
591 		kfd_flush_tlb(pdd);
592 	}
593 
594 	/* activate all active queues on the qpd */
595 	list_for_each_entry(q, &qpd->queues_list, list) {
596 		if (!q->properties.is_evicted)
597 			continue;
598 		mqd = dqm->ops.get_mqd_manager(dqm,
599 			get_mqd_type_from_queue_type(q->properties.type));
600 		if (!mqd) { /* should not be here */
601 			pr_err("Cannot restore queue, mqd mgr is NULL\n");
602 			retval = -ENOMEM;
603 			goto out;
604 		}
605 		q->properties.is_evicted = false;
606 		q->properties.is_active = true;
607 		retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
608 				       q->queue, &q->properties,
609 				       q->process->mm);
610 		if (retval)
611 			goto out;
612 		dqm->queue_count++;
613 	}
614 	qpd->evicted = 0;
615 out:
616 	mutex_unlock(&dqm->lock);
617 	return retval;
618 }
619 
620 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
621 					struct qcm_process_device *qpd)
622 {
623 	struct queue *q;
624 	struct kfd_process_device *pdd;
625 	uint32_t pd_base;
626 	int retval = 0;
627 
628 	pdd = qpd_to_pdd(qpd);
629 	/* Retrieve PD base */
630 	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
631 
632 	mutex_lock(&dqm->lock);
633 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
634 		goto out;
635 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
636 		qpd->evicted--;
637 		goto out;
638 	}
639 
640 	pr_info_ratelimited("Restoring PASID %u queues\n",
641 			    pdd->process->pasid);
642 
643 	/* Update PD Base in QPD */
644 	qpd->page_table_base = pd_base;
645 	pr_debug("Updated PD address to 0x%08x\n", pd_base);
646 
647 	/* activate all active queues on the qpd */
648 	list_for_each_entry(q, &qpd->queues_list, list) {
649 		if (!q->properties.is_evicted)
650 			continue;
651 		q->properties.is_evicted = false;
652 		q->properties.is_active = true;
653 		dqm->queue_count++;
654 	}
655 	retval = execute_queues_cpsch(dqm,
656 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
657 	if (!retval)
658 		qpd->evicted = 0;
659 out:
660 	mutex_unlock(&dqm->lock);
661 	return retval;
662 }
663 
664 static int register_process(struct device_queue_manager *dqm,
665 					struct qcm_process_device *qpd)
666 {
667 	struct device_process_node *n;
668 	struct kfd_process_device *pdd;
669 	uint32_t pd_base;
670 	int retval;
671 
672 	n = kzalloc(sizeof(*n), GFP_KERNEL);
673 	if (!n)
674 		return -ENOMEM;
675 
676 	n->qpd = qpd;
677 
678 	pdd = qpd_to_pdd(qpd);
679 	/* Retrieve PD base */
680 	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
681 
682 	mutex_lock(&dqm->lock);
683 	list_add(&n->list, &dqm->queues);
684 
685 	/* Update PD Base in QPD */
686 	qpd->page_table_base = pd_base;
687 
688 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
689 
690 	dqm->processes_count++;
691 
692 	mutex_unlock(&dqm->lock);
693 
694 	return retval;
695 }
696 
697 static int unregister_process(struct device_queue_manager *dqm,
698 					struct qcm_process_device *qpd)
699 {
700 	int retval;
701 	struct device_process_node *cur, *next;
702 
703 	pr_debug("qpd->queues_list is %s\n",
704 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
705 
706 	retval = 0;
707 	mutex_lock(&dqm->lock);
708 
709 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
710 		if (qpd == cur->qpd) {
711 			list_del(&cur->list);
712 			kfree(cur);
713 			dqm->processes_count--;
714 			goto out;
715 		}
716 	}
717 	/* qpd not found in dqm list */
718 	retval = 1;
719 out:
720 	mutex_unlock(&dqm->lock);
721 	return retval;
722 }
723 
724 static int
725 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
726 			unsigned int vmid)
727 {
728 	uint32_t pasid_mapping;
729 
730 	pasid_mapping = (pasid == 0) ? 0 :
731 		(uint32_t)pasid |
732 		ATC_VMID_PASID_MAPPING_VALID;
733 
734 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
735 						dqm->dev->kgd, pasid_mapping,
736 						vmid);
737 }
738 
739 static void init_interrupts(struct device_queue_manager *dqm)
740 {
741 	unsigned int i;
742 
743 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
744 		if (is_pipe_enabled(dqm, 0, i))
745 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
746 }
747 
748 static int initialize_nocpsch(struct device_queue_manager *dqm)
749 {
750 	int pipe, queue;
751 
752 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
753 
754 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
755 					sizeof(unsigned int), GFP_KERNEL);
756 	if (!dqm->allocated_queues)
757 		return -ENOMEM;
758 
759 	mutex_init(&dqm->lock);
760 	INIT_LIST_HEAD(&dqm->queues);
761 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
762 	dqm->sdma_queue_count = 0;
763 
764 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
765 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
766 
767 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
768 			if (test_bit(pipe_offset + queue,
769 				     dqm->dev->shared_resources.queue_bitmap))
770 				dqm->allocated_queues[pipe] |= 1 << queue;
771 	}
772 
773 	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
774 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
775 
776 	return 0;
777 }
778 
779 static void uninitialize(struct device_queue_manager *dqm)
780 {
781 	int i;
782 
783 	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
784 
785 	kfree(dqm->allocated_queues);
786 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
787 		kfree(dqm->mqds[i]);
788 	mutex_destroy(&dqm->lock);
789 	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
790 }
791 
792 static int start_nocpsch(struct device_queue_manager *dqm)
793 {
794 	init_interrupts(dqm);
795 	return 0;
796 }
797 
798 static int stop_nocpsch(struct device_queue_manager *dqm)
799 {
800 	return 0;
801 }
802 
803 static int allocate_sdma_queue(struct device_queue_manager *dqm,
804 				unsigned int *sdma_queue_id)
805 {
806 	int bit;
807 
808 	if (dqm->sdma_bitmap == 0)
809 		return -ENOMEM;
810 
811 	bit = ffs(dqm->sdma_bitmap) - 1;
812 	dqm->sdma_bitmap &= ~(1 << bit);
813 	*sdma_queue_id = bit;
814 
815 	return 0;
816 }
817 
818 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
819 				unsigned int sdma_queue_id)
820 {
821 	if (sdma_queue_id >= CIK_SDMA_QUEUES)
822 		return;
823 	dqm->sdma_bitmap |= (1 << sdma_queue_id);
824 }
825 
826 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
827 					struct queue *q,
828 					struct qcm_process_device *qpd)
829 {
830 	struct mqd_manager *mqd;
831 	int retval;
832 
833 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
834 	if (!mqd)
835 		return -ENOMEM;
836 
837 	retval = allocate_sdma_queue(dqm, &q->sdma_id);
838 	if (retval)
839 		return retval;
840 
841 	q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
842 	q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
843 
844 	pr_debug("SDMA id is:    %d\n", q->sdma_id);
845 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
846 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
847 
848 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
849 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
850 				&q->gart_mqd_addr, &q->properties);
851 	if (retval)
852 		goto out_deallocate_sdma_queue;
853 
854 	retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
855 	if (retval)
856 		goto out_uninit_mqd;
857 
858 	return 0;
859 
860 out_uninit_mqd:
861 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
862 out_deallocate_sdma_queue:
863 	deallocate_sdma_queue(dqm, q->sdma_id);
864 
865 	return retval;
866 }
867 
868 /*
869  * Device Queue Manager implementation for cp scheduler
870  */
871 
872 static int set_sched_resources(struct device_queue_manager *dqm)
873 {
874 	int i, mec;
875 	struct scheduling_resources res;
876 
877 	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
878 
879 	res.queue_mask = 0;
880 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
881 		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
882 			/ dqm->dev->shared_resources.num_pipe_per_mec;
883 
884 		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
885 			continue;
886 
887 		/* only acquire queues from the first MEC */
888 		if (mec > 0)
889 			continue;
890 
891 		/* This situation may be hit in the future if a new HW
892 		 * generation exposes more than 64 queues. If so, the
893 		 * definition of res.queue_mask needs updating
894 		 */
895 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
896 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
897 			break;
898 		}
899 
900 		res.queue_mask |= (1ull << i);
901 	}
902 	res.gws_mask = res.oac_mask = res.gds_heap_base =
903 						res.gds_heap_size = 0;
904 
905 	pr_debug("Scheduling resources:\n"
906 			"vmid mask: 0x%8X\n"
907 			"queue mask: 0x%8llX\n",
908 			res.vmid_mask, res.queue_mask);
909 
910 	return pm_send_set_resources(&dqm->packets, &res);
911 }
912 
913 static int initialize_cpsch(struct device_queue_manager *dqm)
914 {
915 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
916 
917 	mutex_init(&dqm->lock);
918 	INIT_LIST_HEAD(&dqm->queues);
919 	dqm->queue_count = dqm->processes_count = 0;
920 	dqm->sdma_queue_count = 0;
921 	dqm->active_runlist = false;
922 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
923 
924 	return 0;
925 }
926 
927 static int start_cpsch(struct device_queue_manager *dqm)
928 {
929 	int retval;
930 
931 	retval = 0;
932 
933 	retval = pm_init(&dqm->packets, dqm);
934 	if (retval)
935 		goto fail_packet_manager_init;
936 
937 	retval = set_sched_resources(dqm);
938 	if (retval)
939 		goto fail_set_sched_resources;
940 
941 	pr_debug("Allocating fence memory\n");
942 
943 	/* allocate fence memory on the gart */
944 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
945 					&dqm->fence_mem);
946 
947 	if (retval)
948 		goto fail_allocate_vidmem;
949 
950 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
951 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
952 
953 	init_interrupts(dqm);
954 
955 	mutex_lock(&dqm->lock);
956 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
957 	mutex_unlock(&dqm->lock);
958 
959 	return 0;
960 fail_allocate_vidmem:
961 fail_set_sched_resources:
962 	pm_uninit(&dqm->packets);
963 fail_packet_manager_init:
964 	return retval;
965 }
966 
967 static int stop_cpsch(struct device_queue_manager *dqm)
968 {
969 	mutex_lock(&dqm->lock);
970 	unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
971 	mutex_unlock(&dqm->lock);
972 
973 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
974 	pm_uninit(&dqm->packets);
975 
976 	return 0;
977 }
978 
979 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
980 					struct kernel_queue *kq,
981 					struct qcm_process_device *qpd)
982 {
983 	mutex_lock(&dqm->lock);
984 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
985 		pr_warn("Can't create new kernel queue because %d queues were already created\n",
986 				dqm->total_queue_count);
987 		mutex_unlock(&dqm->lock);
988 		return -EPERM;
989 	}
990 
991 	/*
992 	 * Unconditionally increment this counter, regardless of the queue's
993 	 * type or whether the queue is active.
994 	 */
995 	dqm->total_queue_count++;
996 	pr_debug("Total of %d queues are accountable so far\n",
997 			dqm->total_queue_count);
998 
999 	list_add(&kq->list, &qpd->priv_queue_list);
1000 	dqm->queue_count++;
1001 	qpd->is_debug = true;
1002 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1003 	mutex_unlock(&dqm->lock);
1004 
1005 	return 0;
1006 }
1007 
1008 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1009 					struct kernel_queue *kq,
1010 					struct qcm_process_device *qpd)
1011 {
1012 	mutex_lock(&dqm->lock);
1013 	list_del(&kq->list);
1014 	dqm->queue_count--;
1015 	qpd->is_debug = false;
1016 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1017 	/*
1018 	 * Unconditionally decrement this counter, regardless of the queue's
1019 	 * type.
1020 	 */
1021 	dqm->total_queue_count--;
1022 	pr_debug("Total of %d queues are accountable so far\n",
1023 			dqm->total_queue_count);
1024 	mutex_unlock(&dqm->lock);
1025 }
1026 
1027 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1028 			struct qcm_process_device *qpd)
1029 {
1030 	int retval;
1031 	struct mqd_manager *mqd;
1032 
1033 	retval = 0;
1034 
1035 	mutex_lock(&dqm->lock);
1036 
1037 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1038 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1039 				dqm->total_queue_count);
1040 		retval = -EPERM;
1041 		goto out;
1042 	}
1043 
1044 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1045 		retval = allocate_sdma_queue(dqm, &q->sdma_id);
1046 		if (retval)
1047 			goto out;
1048 		q->properties.sdma_queue_id =
1049 			q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
1050 		q->properties.sdma_engine_id =
1051 			q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
1052 	}
1053 	mqd = dqm->ops.get_mqd_manager(dqm,
1054 			get_mqd_type_from_queue_type(q->properties.type));
1055 
1056 	if (!mqd) {
1057 		retval = -ENOMEM;
1058 		goto out;
1059 	}
1060 	/*
1061 	 * Eviction state logic: we only mark active queues as evicted
1062 	 * to avoid the overhead of restoring inactive queues later
1063 	 */
1064 	if (qpd->evicted)
1065 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
1066 					    q->properties.queue_percent > 0 &&
1067 					    q->properties.queue_address != 0);
1068 
1069 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1070 
1071 	q->properties.tba_addr = qpd->tba_addr;
1072 	q->properties.tma_addr = qpd->tma_addr;
1073 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
1074 				&q->gart_mqd_addr, &q->properties);
1075 	if (retval)
1076 		goto out;
1077 
1078 	list_add(&q->list, &qpd->queues_list);
1079 	qpd->queue_count++;
1080 	if (q->properties.is_active) {
1081 		dqm->queue_count++;
1082 		retval = execute_queues_cpsch(dqm,
1083 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1084 	}
1085 
1086 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1087 		dqm->sdma_queue_count++;
1088 	/*
1089 	 * Unconditionally increment this counter, regardless of the queue's
1090 	 * type or whether the queue is active.
1091 	 */
1092 	dqm->total_queue_count++;
1093 
1094 	pr_debug("Total of %d queues are accountable so far\n",
1095 			dqm->total_queue_count);
1096 
1097 out:
1098 	mutex_unlock(&dqm->lock);
1099 	return retval;
1100 }
1101 
1102 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1103 				unsigned int fence_value,
1104 				unsigned int timeout_ms)
1105 {
1106 	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1107 
1108 	while (*fence_addr != fence_value) {
1109 		if (time_after(jiffies, end_jiffies)) {
1110 			pr_err("qcm fence wait loop timeout expired\n");
1111 			return -ETIME;
1112 		}
1113 		schedule();
1114 	}
1115 
1116 	return 0;
1117 }
1118 
1119 static int unmap_sdma_queues(struct device_queue_manager *dqm,
1120 				unsigned int sdma_engine)
1121 {
1122 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1123 			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
1124 			sdma_engine);
1125 }
1126 
1127 /* dqm->lock mutex has to be locked before calling this function */
1128 static int map_queues_cpsch(struct device_queue_manager *dqm)
1129 {
1130 	int retval;
1131 
1132 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1133 		return 0;
1134 
1135 	if (dqm->active_runlist)
1136 		return 0;
1137 
1138 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1139 	if (retval) {
1140 		pr_err("failed to execute runlist\n");
1141 		return retval;
1142 	}
1143 	dqm->active_runlist = true;
1144 
1145 	return retval;
1146 }
1147 
1148 /* dqm->lock mutex has to be locked before calling this function */
1149 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1150 				enum kfd_unmap_queues_filter filter,
1151 				uint32_t filter_param)
1152 {
1153 	int retval = 0;
1154 
1155 	if (!dqm->active_runlist)
1156 		return retval;
1157 
1158 	pr_debug("Before destroying queues, sdma queue count is : %u\n",
1159 		dqm->sdma_queue_count);
1160 
1161 	if (dqm->sdma_queue_count > 0) {
1162 		unmap_sdma_queues(dqm, 0);
1163 		unmap_sdma_queues(dqm, 1);
1164 	}
1165 
1166 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1167 			filter, filter_param, false, 0);
1168 	if (retval)
1169 		return retval;
1170 
1171 	*dqm->fence_addr = KFD_FENCE_INIT;
1172 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1173 				KFD_FENCE_COMPLETED);
1174 	/* should be timed out */
1175 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1176 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1177 	if (retval)
1178 		return retval;
1179 
1180 	pm_release_ib(&dqm->packets);
1181 	dqm->active_runlist = false;
1182 
1183 	return retval;
1184 }
1185 
1186 /* dqm->lock mutex has to be locked before calling this function */
1187 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1188 				enum kfd_unmap_queues_filter filter,
1189 				uint32_t filter_param)
1190 {
1191 	int retval;
1192 
1193 	retval = unmap_queues_cpsch(dqm, filter, filter_param);
1194 	if (retval) {
1195 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1196 		return retval;
1197 	}
1198 
1199 	return map_queues_cpsch(dqm);
1200 }
1201 
1202 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1203 				struct qcm_process_device *qpd,
1204 				struct queue *q)
1205 {
1206 	int retval;
1207 	struct mqd_manager *mqd;
1208 	bool preempt_all_queues;
1209 
1210 	preempt_all_queues = false;
1211 
1212 	retval = 0;
1213 
1214 	/* remove queue from list to prevent rescheduling after preemption */
1215 	mutex_lock(&dqm->lock);
1216 
1217 	if (qpd->is_debug) {
1218 		/*
1219 		 * error, currently we do not allow to destroy a queue
1220 		 * of a currently debugged process
1221 		 */
1222 		retval = -EBUSY;
1223 		goto failed_try_destroy_debugged_queue;
1224 
1225 	}
1226 
1227 	mqd = dqm->ops.get_mqd_manager(dqm,
1228 			get_mqd_type_from_queue_type(q->properties.type));
1229 	if (!mqd) {
1230 		retval = -ENOMEM;
1231 		goto failed;
1232 	}
1233 
1234 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1235 		dqm->sdma_queue_count--;
1236 		deallocate_sdma_queue(dqm, q->sdma_id);
1237 	}
1238 
1239 	list_del(&q->list);
1240 	qpd->queue_count--;
1241 	if (q->properties.is_active) {
1242 		dqm->queue_count--;
1243 		retval = execute_queues_cpsch(dqm,
1244 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1245 		if (retval == -ETIME)
1246 			qpd->reset_wavefronts = true;
1247 	}
1248 
1249 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1250 
1251 	/*
1252 	 * Unconditionally decrement this counter, regardless of the queue's
1253 	 * type
1254 	 */
1255 	dqm->total_queue_count--;
1256 	pr_debug("Total of %d queues are accountable so far\n",
1257 			dqm->total_queue_count);
1258 
1259 	mutex_unlock(&dqm->lock);
1260 
1261 	return retval;
1262 
1263 failed:
1264 failed_try_destroy_debugged_queue:
1265 
1266 	mutex_unlock(&dqm->lock);
1267 	return retval;
1268 }
1269 
1270 /*
1271  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1272  * stay in user mode.
1273  */
1274 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1275 /* APE1 limit is inclusive and 64K aligned. */
1276 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1277 
1278 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1279 				   struct qcm_process_device *qpd,
1280 				   enum cache_policy default_policy,
1281 				   enum cache_policy alternate_policy,
1282 				   void __user *alternate_aperture_base,
1283 				   uint64_t alternate_aperture_size)
1284 {
1285 	bool retval;
1286 
1287 	mutex_lock(&dqm->lock);
1288 
1289 	if (alternate_aperture_size == 0) {
1290 		/* base > limit disables APE1 */
1291 		qpd->sh_mem_ape1_base = 1;
1292 		qpd->sh_mem_ape1_limit = 0;
1293 	} else {
1294 		/*
1295 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1296 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1297 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1298 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1299 		 * Verify that the base and size parameters can be
1300 		 * represented in this format and convert them.
1301 		 * Additionally restrict APE1 to user-mode addresses.
1302 		 */
1303 
1304 		uint64_t base = (uintptr_t)alternate_aperture_base;
1305 		uint64_t limit = base + alternate_aperture_size - 1;
1306 
1307 		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1308 		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1309 			retval = false;
1310 			goto out;
1311 		}
1312 
1313 		qpd->sh_mem_ape1_base = base >> 16;
1314 		qpd->sh_mem_ape1_limit = limit >> 16;
1315 	}
1316 
1317 	retval = dqm->asic_ops.set_cache_memory_policy(
1318 			dqm,
1319 			qpd,
1320 			default_policy,
1321 			alternate_policy,
1322 			alternate_aperture_base,
1323 			alternate_aperture_size);
1324 
1325 	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1326 		program_sh_mem_settings(dqm, qpd);
1327 
1328 	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1329 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1330 		qpd->sh_mem_ape1_limit);
1331 
1332 out:
1333 	mutex_unlock(&dqm->lock);
1334 	return retval;
1335 }
1336 
1337 static int set_trap_handler(struct device_queue_manager *dqm,
1338 				struct qcm_process_device *qpd,
1339 				uint64_t tba_addr,
1340 				uint64_t tma_addr)
1341 {
1342 	uint64_t *tma;
1343 
1344 	if (dqm->dev->cwsr_enabled) {
1345 		/* Jump from CWSR trap handler to user trap */
1346 		tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1347 		tma[0] = tba_addr;
1348 		tma[1] = tma_addr;
1349 	} else {
1350 		qpd->tba_addr = tba_addr;
1351 		qpd->tma_addr = tma_addr;
1352 	}
1353 
1354 	return 0;
1355 }
1356 
1357 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1358 		struct qcm_process_device *qpd)
1359 {
1360 	struct queue *q, *next;
1361 	struct device_process_node *cur, *next_dpn;
1362 	int retval = 0;
1363 
1364 	mutex_lock(&dqm->lock);
1365 
1366 	/* Clear all user mode queues */
1367 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1368 		int ret;
1369 
1370 		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1371 		if (ret)
1372 			retval = ret;
1373 	}
1374 
1375 	/* Unregister process */
1376 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1377 		if (qpd == cur->qpd) {
1378 			list_del(&cur->list);
1379 			kfree(cur);
1380 			dqm->processes_count--;
1381 			break;
1382 		}
1383 	}
1384 
1385 	mutex_unlock(&dqm->lock);
1386 	return retval;
1387 }
1388 
1389 
1390 static int process_termination_cpsch(struct device_queue_manager *dqm,
1391 		struct qcm_process_device *qpd)
1392 {
1393 	int retval;
1394 	struct queue *q, *next;
1395 	struct kernel_queue *kq, *kq_next;
1396 	struct mqd_manager *mqd;
1397 	struct device_process_node *cur, *next_dpn;
1398 	enum kfd_unmap_queues_filter filter =
1399 		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1400 
1401 	retval = 0;
1402 
1403 	mutex_lock(&dqm->lock);
1404 
1405 	/* Clean all kernel queues */
1406 	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1407 		list_del(&kq->list);
1408 		dqm->queue_count--;
1409 		qpd->is_debug = false;
1410 		dqm->total_queue_count--;
1411 		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1412 	}
1413 
1414 	/* Clear all user mode queues */
1415 	list_for_each_entry(q, &qpd->queues_list, list) {
1416 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1417 			dqm->sdma_queue_count--;
1418 
1419 		if (q->properties.is_active)
1420 			dqm->queue_count--;
1421 
1422 		dqm->total_queue_count--;
1423 	}
1424 
1425 	/* Unregister process */
1426 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1427 		if (qpd == cur->qpd) {
1428 			list_del(&cur->list);
1429 			kfree(cur);
1430 			dqm->processes_count--;
1431 			break;
1432 		}
1433 	}
1434 
1435 	retval = execute_queues_cpsch(dqm, filter, 0);
1436 	if (retval || qpd->reset_wavefronts) {
1437 		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1438 		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1439 		qpd->reset_wavefronts = false;
1440 	}
1441 
1442 	/* lastly, free mqd resources */
1443 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1444 		mqd = dqm->ops.get_mqd_manager(dqm,
1445 			get_mqd_type_from_queue_type(q->properties.type));
1446 		if (!mqd) {
1447 			retval = -ENOMEM;
1448 			goto out;
1449 		}
1450 		list_del(&q->list);
1451 		qpd->queue_count--;
1452 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1453 	}
1454 
1455 out:
1456 	mutex_unlock(&dqm->lock);
1457 	return retval;
1458 }
1459 
1460 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1461 {
1462 	struct device_queue_manager *dqm;
1463 
1464 	pr_debug("Loading device queue manager\n");
1465 
1466 	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1467 	if (!dqm)
1468 		return NULL;
1469 
1470 	switch (dev->device_info->asic_family) {
1471 	/* HWS is not available on Hawaii. */
1472 	case CHIP_HAWAII:
1473 	/* HWS depends on CWSR for timely dequeue. CWSR is not
1474 	 * available on Tonga.
1475 	 *
1476 	 * FIXME: This argument also applies to Kaveri.
1477 	 */
1478 	case CHIP_TONGA:
1479 		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1480 		break;
1481 	default:
1482 		dqm->sched_policy = sched_policy;
1483 		break;
1484 	}
1485 
1486 	dqm->dev = dev;
1487 	switch (dqm->sched_policy) {
1488 	case KFD_SCHED_POLICY_HWS:
1489 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1490 		/* initialize dqm for cp scheduling */
1491 		dqm->ops.create_queue = create_queue_cpsch;
1492 		dqm->ops.initialize = initialize_cpsch;
1493 		dqm->ops.start = start_cpsch;
1494 		dqm->ops.stop = stop_cpsch;
1495 		dqm->ops.destroy_queue = destroy_queue_cpsch;
1496 		dqm->ops.update_queue = update_queue;
1497 		dqm->ops.get_mqd_manager = get_mqd_manager;
1498 		dqm->ops.register_process = register_process;
1499 		dqm->ops.unregister_process = unregister_process;
1500 		dqm->ops.uninitialize = uninitialize;
1501 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1502 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1503 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1504 		dqm->ops.set_trap_handler = set_trap_handler;
1505 		dqm->ops.process_termination = process_termination_cpsch;
1506 		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1507 		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1508 		break;
1509 	case KFD_SCHED_POLICY_NO_HWS:
1510 		/* initialize dqm for no cp scheduling */
1511 		dqm->ops.start = start_nocpsch;
1512 		dqm->ops.stop = stop_nocpsch;
1513 		dqm->ops.create_queue = create_queue_nocpsch;
1514 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
1515 		dqm->ops.update_queue = update_queue;
1516 		dqm->ops.get_mqd_manager = get_mqd_manager;
1517 		dqm->ops.register_process = register_process;
1518 		dqm->ops.unregister_process = unregister_process;
1519 		dqm->ops.initialize = initialize_nocpsch;
1520 		dqm->ops.uninitialize = uninitialize;
1521 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1522 		dqm->ops.set_trap_handler = set_trap_handler;
1523 		dqm->ops.process_termination = process_termination_nocpsch;
1524 		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1525 		dqm->ops.restore_process_queues =
1526 			restore_process_queues_nocpsch;
1527 		break;
1528 	default:
1529 		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1530 		goto out_free;
1531 	}
1532 
1533 	switch (dev->device_info->asic_family) {
1534 	case CHIP_CARRIZO:
1535 		device_queue_manager_init_vi(&dqm->asic_ops);
1536 		break;
1537 
1538 	case CHIP_KAVERI:
1539 		device_queue_manager_init_cik(&dqm->asic_ops);
1540 		break;
1541 
1542 	case CHIP_HAWAII:
1543 		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1544 		break;
1545 
1546 	case CHIP_TONGA:
1547 	case CHIP_FIJI:
1548 	case CHIP_POLARIS10:
1549 	case CHIP_POLARIS11:
1550 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1551 		break;
1552 	default:
1553 		WARN(1, "Unexpected ASIC family %u",
1554 		     dev->device_info->asic_family);
1555 		goto out_free;
1556 	}
1557 
1558 	if (!dqm->ops.initialize(dqm))
1559 		return dqm;
1560 
1561 out_free:
1562 	kfree(dqm);
1563 	return NULL;
1564 }
1565 
1566 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1567 {
1568 	dqm->ops.uninitialize(dqm);
1569 	kfree(dqm);
1570 }
1571 
1572 #if defined(CONFIG_DEBUG_FS)
1573 
1574 static void seq_reg_dump(struct seq_file *m,
1575 			 uint32_t (*dump)[2], uint32_t n_regs)
1576 {
1577 	uint32_t i, count;
1578 
1579 	for (i = 0, count = 0; i < n_regs; i++) {
1580 		if (count == 0 ||
1581 		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1582 			seq_printf(m, "%s    %08x: %08x",
1583 				   i ? "\n" : "",
1584 				   dump[i][0], dump[i][1]);
1585 			count = 7;
1586 		} else {
1587 			seq_printf(m, " %08x", dump[i][1]);
1588 			count--;
1589 		}
1590 	}
1591 
1592 	seq_puts(m, "\n");
1593 }
1594 
1595 int dqm_debugfs_hqds(struct seq_file *m, void *data)
1596 {
1597 	struct device_queue_manager *dqm = data;
1598 	uint32_t (*dump)[2], n_regs;
1599 	int pipe, queue;
1600 	int r = 0;
1601 
1602 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1603 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
1604 
1605 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
1606 			if (!test_bit(pipe_offset + queue,
1607 				      dqm->dev->shared_resources.queue_bitmap))
1608 				continue;
1609 
1610 			r = dqm->dev->kfd2kgd->hqd_dump(
1611 				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1612 			if (r)
1613 				break;
1614 
1615 			seq_printf(m, "  CP Pipe %d, Queue %d\n",
1616 				  pipe, queue);
1617 			seq_reg_dump(m, dump, n_regs);
1618 
1619 			kfree(dump);
1620 		}
1621 	}
1622 
1623 	for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
1624 		for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
1625 			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
1626 				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1627 			if (r)
1628 				break;
1629 
1630 			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
1631 				  pipe, queue);
1632 			seq_reg_dump(m, dump, n_regs);
1633 
1634 			kfree(dump);
1635 		}
1636 	}
1637 
1638 	return r;
1639 }
1640 
1641 #endif
1642