1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/slab.h>
25 #include <linux/list.h>
26 #include <linux/types.h>
27 #include <linux/printk.h>
28 #include <linux/bitops.h>
29 #include <linux/sched.h>
30 #include "kfd_priv.h"
31 #include "kfd_device_queue_manager.h"
32 #include "kfd_mqd_manager.h"
33 #include "cik_regs.h"
34 #include "kfd_kernel_queue.h"
35 
36 /* Size of the per-pipe EOP queue */
37 #define CIK_HPD_EOP_BYTES_LOG2 11
38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39 
40 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
41 					unsigned int pasid, unsigned int vmid);
42 
43 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
44 					struct queue *q,
45 					struct qcm_process_device *qpd);
46 
47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
48 static int destroy_queues_cpsch(struct device_queue_manager *dqm,
49 				bool preempt_static_queues, bool lock);
50 
51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
52 					struct queue *q,
53 					struct qcm_process_device *qpd);
54 
55 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
56 				unsigned int sdma_queue_id);
57 
58 static inline
59 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
60 {
61 	if (type == KFD_QUEUE_TYPE_SDMA)
62 		return KFD_MQD_TYPE_SDMA;
63 	return KFD_MQD_TYPE_CP;
64 }
65 
66 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
67 {
68 	int i;
69 	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
70 		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;
71 
72 	/* queue is available for KFD usage if bit is 1 */
73 	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
74 		if (test_bit(pipe_offset + i,
75 			      dqm->dev->shared_resources.queue_bitmap))
76 			return true;
77 	return false;
78 }
79 
80 unsigned int get_queues_num(struct device_queue_manager *dqm)
81 {
82 	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
83 				KGD_MAX_QUEUES);
84 }
85 
86 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
87 {
88 	return dqm->dev->shared_resources.num_queue_per_pipe;
89 }
90 
91 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
92 {
93 	return dqm->dev->shared_resources.num_pipe_per_mec;
94 }
95 
96 void program_sh_mem_settings(struct device_queue_manager *dqm,
97 					struct qcm_process_device *qpd)
98 {
99 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
100 						dqm->dev->kgd, qpd->vmid,
101 						qpd->sh_mem_config,
102 						qpd->sh_mem_ape1_base,
103 						qpd->sh_mem_ape1_limit,
104 						qpd->sh_mem_bases);
105 }
106 
107 static int allocate_vmid(struct device_queue_manager *dqm,
108 			struct qcm_process_device *qpd,
109 			struct queue *q)
110 {
111 	int bit, allocated_vmid;
112 
113 	if (dqm->vmid_bitmap == 0)
114 		return -ENOMEM;
115 
116 	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
117 	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
118 
119 	/* Kaveri kfd vmid's starts from vmid 8 */
120 	allocated_vmid = bit + KFD_VMID_START_OFFSET;
121 	pr_debug("vmid allocation %d\n", allocated_vmid);
122 	qpd->vmid = allocated_vmid;
123 	q->properties.vmid = allocated_vmid;
124 
125 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
126 	program_sh_mem_settings(dqm, qpd);
127 
128 	return 0;
129 }
130 
131 static void deallocate_vmid(struct device_queue_manager *dqm,
132 				struct qcm_process_device *qpd,
133 				struct queue *q)
134 {
135 	int bit = qpd->vmid - KFD_VMID_START_OFFSET;
136 
137 	/* Release the vmid mapping */
138 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
139 
140 	set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
141 	qpd->vmid = 0;
142 	q->properties.vmid = 0;
143 }
144 
145 static int create_queue_nocpsch(struct device_queue_manager *dqm,
146 				struct queue *q,
147 				struct qcm_process_device *qpd,
148 				int *allocated_vmid)
149 {
150 	int retval;
151 
152 	print_queue(q);
153 
154 	mutex_lock(&dqm->lock);
155 
156 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
157 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
158 				dqm->total_queue_count);
159 		retval = -EPERM;
160 		goto out_unlock;
161 	}
162 
163 	if (list_empty(&qpd->queues_list)) {
164 		retval = allocate_vmid(dqm, qpd, q);
165 		if (retval)
166 			goto out_unlock;
167 	}
168 	*allocated_vmid = qpd->vmid;
169 	q->properties.vmid = qpd->vmid;
170 
171 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
172 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
173 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
174 		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
175 	else
176 		retval = -EINVAL;
177 
178 	if (retval) {
179 		if (list_empty(&qpd->queues_list)) {
180 			deallocate_vmid(dqm, qpd, q);
181 			*allocated_vmid = 0;
182 		}
183 		goto out_unlock;
184 	}
185 
186 	list_add(&q->list, &qpd->queues_list);
187 	if (q->properties.is_active)
188 		dqm->queue_count++;
189 
190 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
191 		dqm->sdma_queue_count++;
192 
193 	/*
194 	 * Unconditionally increment this counter, regardless of the queue's
195 	 * type or whether the queue is active.
196 	 */
197 	dqm->total_queue_count++;
198 	pr_debug("Total of %d queues are accountable so far\n",
199 			dqm->total_queue_count);
200 
201 out_unlock:
202 	mutex_unlock(&dqm->lock);
203 	return retval;
204 }
205 
206 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
207 {
208 	bool set;
209 	int pipe, bit, i;
210 
211 	set = false;
212 
213 	for (pipe = dqm->next_pipe_to_allocate, i = 0;
214 			i < get_pipes_per_mec(dqm);
215 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
216 
217 		if (!is_pipe_enabled(dqm, 0, pipe))
218 			continue;
219 
220 		if (dqm->allocated_queues[pipe] != 0) {
221 			bit = find_first_bit(
222 				(unsigned long *)&dqm->allocated_queues[pipe],
223 				get_queues_per_pipe(dqm));
224 
225 			clear_bit(bit,
226 				(unsigned long *)&dqm->allocated_queues[pipe]);
227 			q->pipe = pipe;
228 			q->queue = bit;
229 			set = true;
230 			break;
231 		}
232 	}
233 
234 	if (!set)
235 		return -EBUSY;
236 
237 	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
238 	/* horizontal hqd allocation */
239 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
240 
241 	return 0;
242 }
243 
244 static inline void deallocate_hqd(struct device_queue_manager *dqm,
245 				struct queue *q)
246 {
247 	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
248 }
249 
250 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
251 					struct queue *q,
252 					struct qcm_process_device *qpd)
253 {
254 	int retval;
255 	struct mqd_manager *mqd;
256 
257 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
258 	if (!mqd)
259 		return -ENOMEM;
260 
261 	retval = allocate_hqd(dqm, q);
262 	if (retval)
263 		return retval;
264 
265 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
266 				&q->gart_mqd_addr, &q->properties);
267 	if (retval)
268 		goto out_deallocate_hqd;
269 
270 	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
271 			q->pipe, q->queue);
272 
273 	dqm->dev->kfd2kgd->set_scratch_backing_va(
274 			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
275 
276 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
277 			       q->process->mm);
278 	if (retval)
279 		goto out_uninit_mqd;
280 
281 	return 0;
282 
283 out_uninit_mqd:
284 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
285 out_deallocate_hqd:
286 	deallocate_hqd(dqm, q);
287 
288 	return retval;
289 }
290 
291 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
292 				struct qcm_process_device *qpd,
293 				struct queue *q)
294 {
295 	int retval;
296 	struct mqd_manager *mqd;
297 
298 	retval = 0;
299 
300 	mutex_lock(&dqm->lock);
301 
302 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
303 		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
304 		if (mqd == NULL) {
305 			retval = -ENOMEM;
306 			goto out;
307 		}
308 		deallocate_hqd(dqm, q);
309 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
310 		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
311 		if (mqd == NULL) {
312 			retval = -ENOMEM;
313 			goto out;
314 		}
315 		dqm->sdma_queue_count--;
316 		deallocate_sdma_queue(dqm, q->sdma_id);
317 	} else {
318 		pr_debug("q->properties.type %d is invalid\n",
319 				q->properties.type);
320 		retval = -EINVAL;
321 		goto out;
322 	}
323 
324 	retval = mqd->destroy_mqd(mqd, q->mqd,
325 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
326 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
327 				q->pipe, q->queue);
328 
329 	if (retval)
330 		goto out;
331 
332 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
333 
334 	list_del(&q->list);
335 	if (list_empty(&qpd->queues_list))
336 		deallocate_vmid(dqm, qpd, q);
337 	if (q->properties.is_active)
338 		dqm->queue_count--;
339 
340 	/*
341 	 * Unconditionally decrement this counter, regardless of the queue's
342 	 * type
343 	 */
344 	dqm->total_queue_count--;
345 	pr_debug("Total of %d queues are accountable so far\n",
346 			dqm->total_queue_count);
347 
348 out:
349 	mutex_unlock(&dqm->lock);
350 	return retval;
351 }
352 
353 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
354 {
355 	int retval;
356 	struct mqd_manager *mqd;
357 	bool prev_active = false;
358 
359 	mutex_lock(&dqm->lock);
360 	mqd = dqm->ops.get_mqd_manager(dqm,
361 			get_mqd_type_from_queue_type(q->properties.type));
362 	if (!mqd) {
363 		retval = -ENOMEM;
364 		goto out_unlock;
365 	}
366 
367 	if (q->properties.is_active)
368 		prev_active = true;
369 
370 	/*
371 	 *
372 	 * check active state vs. the previous state
373 	 * and modify counter accordingly
374 	 */
375 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
376 	if ((q->properties.is_active) && (!prev_active))
377 		dqm->queue_count++;
378 	else if (!q->properties.is_active && prev_active)
379 		dqm->queue_count--;
380 
381 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
382 		retval = execute_queues_cpsch(dqm, false);
383 
384 out_unlock:
385 	mutex_unlock(&dqm->lock);
386 	return retval;
387 }
388 
389 static struct mqd_manager *get_mqd_manager_nocpsch(
390 		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
391 {
392 	struct mqd_manager *mqd;
393 
394 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
395 		return NULL;
396 
397 	pr_debug("mqd type %d\n", type);
398 
399 	mqd = dqm->mqds[type];
400 	if (!mqd) {
401 		mqd = mqd_manager_init(type, dqm->dev);
402 		if (!mqd)
403 			pr_err("mqd manager is NULL");
404 		dqm->mqds[type] = mqd;
405 	}
406 
407 	return mqd;
408 }
409 
410 static int register_process_nocpsch(struct device_queue_manager *dqm,
411 					struct qcm_process_device *qpd)
412 {
413 	struct device_process_node *n;
414 	int retval;
415 
416 	n = kzalloc(sizeof(*n), GFP_KERNEL);
417 	if (!n)
418 		return -ENOMEM;
419 
420 	n->qpd = qpd;
421 
422 	mutex_lock(&dqm->lock);
423 	list_add(&n->list, &dqm->queues);
424 
425 	retval = dqm->ops_asic_specific.register_process(dqm, qpd);
426 
427 	dqm->processes_count++;
428 
429 	mutex_unlock(&dqm->lock);
430 
431 	return retval;
432 }
433 
434 static int unregister_process_nocpsch(struct device_queue_manager *dqm,
435 					struct qcm_process_device *qpd)
436 {
437 	int retval;
438 	struct device_process_node *cur, *next;
439 
440 	pr_debug("qpd->queues_list is %s\n",
441 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
442 
443 	retval = 0;
444 	mutex_lock(&dqm->lock);
445 
446 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
447 		if (qpd == cur->qpd) {
448 			list_del(&cur->list);
449 			kfree(cur);
450 			dqm->processes_count--;
451 			goto out;
452 		}
453 	}
454 	/* qpd not found in dqm list */
455 	retval = 1;
456 out:
457 	mutex_unlock(&dqm->lock);
458 	return retval;
459 }
460 
461 static int
462 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
463 			unsigned int vmid)
464 {
465 	uint32_t pasid_mapping;
466 
467 	pasid_mapping = (pasid == 0) ? 0 :
468 		(uint32_t)pasid |
469 		ATC_VMID_PASID_MAPPING_VALID;
470 
471 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
472 						dqm->dev->kgd, pasid_mapping,
473 						vmid);
474 }
475 
476 static void init_interrupts(struct device_queue_manager *dqm)
477 {
478 	unsigned int i;
479 
480 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
481 		if (is_pipe_enabled(dqm, 0, i))
482 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
483 }
484 
485 static int initialize_nocpsch(struct device_queue_manager *dqm)
486 {
487 	int pipe, queue;
488 
489 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
490 
491 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
492 					sizeof(unsigned int), GFP_KERNEL);
493 	if (!dqm->allocated_queues)
494 		return -ENOMEM;
495 
496 	mutex_init(&dqm->lock);
497 	INIT_LIST_HEAD(&dqm->queues);
498 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
499 	dqm->sdma_queue_count = 0;
500 
501 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
502 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
503 
504 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
505 			if (test_bit(pipe_offset + queue,
506 				     dqm->dev->shared_resources.queue_bitmap))
507 				dqm->allocated_queues[pipe] |= 1 << queue;
508 	}
509 
510 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
511 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
512 
513 	return 0;
514 }
515 
516 static void uninitialize_nocpsch(struct device_queue_manager *dqm)
517 {
518 	int i;
519 
520 	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
521 
522 	kfree(dqm->allocated_queues);
523 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
524 		kfree(dqm->mqds[i]);
525 	mutex_destroy(&dqm->lock);
526 	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
527 }
528 
529 static int start_nocpsch(struct device_queue_manager *dqm)
530 {
531 	init_interrupts(dqm);
532 	return 0;
533 }
534 
535 static int stop_nocpsch(struct device_queue_manager *dqm)
536 {
537 	return 0;
538 }
539 
540 static int allocate_sdma_queue(struct device_queue_manager *dqm,
541 				unsigned int *sdma_queue_id)
542 {
543 	int bit;
544 
545 	if (dqm->sdma_bitmap == 0)
546 		return -ENOMEM;
547 
548 	bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
549 				CIK_SDMA_QUEUES);
550 
551 	clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
552 	*sdma_queue_id = bit;
553 
554 	return 0;
555 }
556 
557 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
558 				unsigned int sdma_queue_id)
559 {
560 	if (sdma_queue_id >= CIK_SDMA_QUEUES)
561 		return;
562 	set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
563 }
564 
565 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
566 					struct queue *q,
567 					struct qcm_process_device *qpd)
568 {
569 	struct mqd_manager *mqd;
570 	int retval;
571 
572 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
573 	if (!mqd)
574 		return -ENOMEM;
575 
576 	retval = allocate_sdma_queue(dqm, &q->sdma_id);
577 	if (retval)
578 		return retval;
579 
580 	q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
581 	q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
582 
583 	pr_debug("SDMA id is:    %d\n", q->sdma_id);
584 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
585 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
586 
587 	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
588 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
589 				&q->gart_mqd_addr, &q->properties);
590 	if (retval)
591 		goto out_deallocate_sdma_queue;
592 
593 	retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
594 	if (retval)
595 		goto out_uninit_mqd;
596 
597 	return 0;
598 
599 out_uninit_mqd:
600 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
601 out_deallocate_sdma_queue:
602 	deallocate_sdma_queue(dqm, q->sdma_id);
603 
604 	return retval;
605 }
606 
607 /*
608  * Device Queue Manager implementation for cp scheduler
609  */
610 
611 static int set_sched_resources(struct device_queue_manager *dqm)
612 {
613 	int i, mec;
614 	struct scheduling_resources res;
615 
616 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
617 	res.vmid_mask <<= KFD_VMID_START_OFFSET;
618 
619 	res.queue_mask = 0;
620 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
621 		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
622 			/ dqm->dev->shared_resources.num_pipe_per_mec;
623 
624 		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
625 			continue;
626 
627 		/* only acquire queues from the first MEC */
628 		if (mec > 0)
629 			continue;
630 
631 		/* This situation may be hit in the future if a new HW
632 		 * generation exposes more than 64 queues. If so, the
633 		 * definition of res.queue_mask needs updating
634 		 */
635 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
636 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
637 			break;
638 		}
639 
640 		res.queue_mask |= (1ull << i);
641 	}
642 	res.gws_mask = res.oac_mask = res.gds_heap_base =
643 						res.gds_heap_size = 0;
644 
645 	pr_debug("Scheduling resources:\n"
646 			"vmid mask: 0x%8X\n"
647 			"queue mask: 0x%8llX\n",
648 			res.vmid_mask, res.queue_mask);
649 
650 	return pm_send_set_resources(&dqm->packets, &res);
651 }
652 
653 static int initialize_cpsch(struct device_queue_manager *dqm)
654 {
655 	int retval;
656 
657 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
658 
659 	mutex_init(&dqm->lock);
660 	INIT_LIST_HEAD(&dqm->queues);
661 	dqm->queue_count = dqm->processes_count = 0;
662 	dqm->sdma_queue_count = 0;
663 	dqm->active_runlist = false;
664 	retval = dqm->ops_asic_specific.initialize(dqm);
665 	if (retval)
666 		mutex_destroy(&dqm->lock);
667 
668 	return retval;
669 }
670 
671 static int start_cpsch(struct device_queue_manager *dqm)
672 {
673 	struct device_process_node *node;
674 	int retval;
675 
676 	retval = 0;
677 
678 	retval = pm_init(&dqm->packets, dqm);
679 	if (retval)
680 		goto fail_packet_manager_init;
681 
682 	retval = set_sched_resources(dqm);
683 	if (retval)
684 		goto fail_set_sched_resources;
685 
686 	pr_debug("Allocating fence memory\n");
687 
688 	/* allocate fence memory on the gart */
689 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
690 					&dqm->fence_mem);
691 
692 	if (retval)
693 		goto fail_allocate_vidmem;
694 
695 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
696 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
697 
698 	init_interrupts(dqm);
699 
700 	list_for_each_entry(node, &dqm->queues, list)
701 		if (node->qpd->pqm->process && dqm->dev)
702 			kfd_bind_process_to_device(dqm->dev,
703 						node->qpd->pqm->process);
704 
705 	execute_queues_cpsch(dqm, true);
706 
707 	return 0;
708 fail_allocate_vidmem:
709 fail_set_sched_resources:
710 	pm_uninit(&dqm->packets);
711 fail_packet_manager_init:
712 	return retval;
713 }
714 
715 static int stop_cpsch(struct device_queue_manager *dqm)
716 {
717 	struct device_process_node *node;
718 	struct kfd_process_device *pdd;
719 
720 	destroy_queues_cpsch(dqm, true, true);
721 
722 	list_for_each_entry(node, &dqm->queues, list) {
723 		pdd = qpd_to_pdd(node->qpd);
724 		pdd->bound = false;
725 	}
726 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
727 	pm_uninit(&dqm->packets);
728 
729 	return 0;
730 }
731 
732 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
733 					struct kernel_queue *kq,
734 					struct qcm_process_device *qpd)
735 {
736 	mutex_lock(&dqm->lock);
737 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
738 		pr_warn("Can't create new kernel queue because %d queues were already created\n",
739 				dqm->total_queue_count);
740 		mutex_unlock(&dqm->lock);
741 		return -EPERM;
742 	}
743 
744 	/*
745 	 * Unconditionally increment this counter, regardless of the queue's
746 	 * type or whether the queue is active.
747 	 */
748 	dqm->total_queue_count++;
749 	pr_debug("Total of %d queues are accountable so far\n",
750 			dqm->total_queue_count);
751 
752 	list_add(&kq->list, &qpd->priv_queue_list);
753 	dqm->queue_count++;
754 	qpd->is_debug = true;
755 	execute_queues_cpsch(dqm, false);
756 	mutex_unlock(&dqm->lock);
757 
758 	return 0;
759 }
760 
761 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
762 					struct kernel_queue *kq,
763 					struct qcm_process_device *qpd)
764 {
765 	mutex_lock(&dqm->lock);
766 	/* here we actually preempt the DIQ */
767 	destroy_queues_cpsch(dqm, true, false);
768 	list_del(&kq->list);
769 	dqm->queue_count--;
770 	qpd->is_debug = false;
771 	execute_queues_cpsch(dqm, false);
772 	/*
773 	 * Unconditionally decrement this counter, regardless of the queue's
774 	 * type.
775 	 */
776 	dqm->total_queue_count--;
777 	pr_debug("Total of %d queues are accountable so far\n",
778 			dqm->total_queue_count);
779 	mutex_unlock(&dqm->lock);
780 }
781 
782 static void select_sdma_engine_id(struct queue *q)
783 {
784 	static int sdma_id;
785 
786 	q->sdma_id = sdma_id;
787 	sdma_id = (sdma_id + 1) % 2;
788 }
789 
790 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
791 			struct qcm_process_device *qpd, int *allocate_vmid)
792 {
793 	int retval;
794 	struct mqd_manager *mqd;
795 
796 	retval = 0;
797 
798 	if (allocate_vmid)
799 		*allocate_vmid = 0;
800 
801 	mutex_lock(&dqm->lock);
802 
803 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
804 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
805 				dqm->total_queue_count);
806 		retval = -EPERM;
807 		goto out;
808 	}
809 
810 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
811 		select_sdma_engine_id(q);
812 
813 	mqd = dqm->ops.get_mqd_manager(dqm,
814 			get_mqd_type_from_queue_type(q->properties.type));
815 
816 	if (!mqd) {
817 		retval = -ENOMEM;
818 		goto out;
819 	}
820 
821 	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
822 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
823 				&q->gart_mqd_addr, &q->properties);
824 	if (retval)
825 		goto out;
826 
827 	list_add(&q->list, &qpd->queues_list);
828 	if (q->properties.is_active) {
829 		dqm->queue_count++;
830 		retval = execute_queues_cpsch(dqm, false);
831 	}
832 
833 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
834 		dqm->sdma_queue_count++;
835 	/*
836 	 * Unconditionally increment this counter, regardless of the queue's
837 	 * type or whether the queue is active.
838 	 */
839 	dqm->total_queue_count++;
840 
841 	pr_debug("Total of %d queues are accountable so far\n",
842 			dqm->total_queue_count);
843 
844 out:
845 	mutex_unlock(&dqm->lock);
846 	return retval;
847 }
848 
849 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
850 				unsigned int fence_value,
851 				unsigned long timeout)
852 {
853 	timeout += jiffies;
854 
855 	while (*fence_addr != fence_value) {
856 		if (time_after(jiffies, timeout)) {
857 			pr_err("qcm fence wait loop timeout expired\n");
858 			return -ETIME;
859 		}
860 		schedule();
861 	}
862 
863 	return 0;
864 }
865 
866 static int destroy_sdma_queues(struct device_queue_manager *dqm,
867 				unsigned int sdma_engine)
868 {
869 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
870 			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
871 			sdma_engine);
872 }
873 
874 static int destroy_queues_cpsch(struct device_queue_manager *dqm,
875 				bool preempt_static_queues, bool lock)
876 {
877 	int retval;
878 	enum kfd_preempt_type_filter preempt_type;
879 	struct kfd_process_device *pdd;
880 
881 	retval = 0;
882 
883 	if (lock)
884 		mutex_lock(&dqm->lock);
885 	if (!dqm->active_runlist)
886 		goto out;
887 
888 	pr_debug("Before destroying queues, sdma queue count is : %u\n",
889 		dqm->sdma_queue_count);
890 
891 	if (dqm->sdma_queue_count > 0) {
892 		destroy_sdma_queues(dqm, 0);
893 		destroy_sdma_queues(dqm, 1);
894 	}
895 
896 	preempt_type = preempt_static_queues ?
897 			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
898 			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
899 
900 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
901 			preempt_type, 0, false, 0);
902 	if (retval)
903 		goto out;
904 
905 	*dqm->fence_addr = KFD_FENCE_INIT;
906 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
907 				KFD_FENCE_COMPLETED);
908 	/* should be timed out */
909 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
910 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
911 	if (retval) {
912 		pdd = kfd_get_process_device_data(dqm->dev,
913 				kfd_get_process(current));
914 		pdd->reset_wavefronts = true;
915 		goto out;
916 	}
917 	pm_release_ib(&dqm->packets);
918 	dqm->active_runlist = false;
919 
920 out:
921 	if (lock)
922 		mutex_unlock(&dqm->lock);
923 	return retval;
924 }
925 
926 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
927 {
928 	int retval;
929 
930 	if (lock)
931 		mutex_lock(&dqm->lock);
932 
933 	retval = destroy_queues_cpsch(dqm, false, false);
934 	if (retval) {
935 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
936 		goto out;
937 	}
938 
939 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
940 		retval = 0;
941 		goto out;
942 	}
943 
944 	if (dqm->active_runlist) {
945 		retval = 0;
946 		goto out;
947 	}
948 
949 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
950 	if (retval) {
951 		pr_err("failed to execute runlist");
952 		goto out;
953 	}
954 	dqm->active_runlist = true;
955 
956 out:
957 	if (lock)
958 		mutex_unlock(&dqm->lock);
959 	return retval;
960 }
961 
962 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
963 				struct qcm_process_device *qpd,
964 				struct queue *q)
965 {
966 	int retval;
967 	struct mqd_manager *mqd;
968 	bool preempt_all_queues;
969 
970 	preempt_all_queues = false;
971 
972 	retval = 0;
973 
974 	/* remove queue from list to prevent rescheduling after preemption */
975 	mutex_lock(&dqm->lock);
976 
977 	if (qpd->is_debug) {
978 		/*
979 		 * error, currently we do not allow to destroy a queue
980 		 * of a currently debugged process
981 		 */
982 		retval = -EBUSY;
983 		goto failed_try_destroy_debugged_queue;
984 
985 	}
986 
987 	mqd = dqm->ops.get_mqd_manager(dqm,
988 			get_mqd_type_from_queue_type(q->properties.type));
989 	if (!mqd) {
990 		retval = -ENOMEM;
991 		goto failed;
992 	}
993 
994 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
995 		dqm->sdma_queue_count--;
996 
997 	list_del(&q->list);
998 	if (q->properties.is_active)
999 		dqm->queue_count--;
1000 
1001 	execute_queues_cpsch(dqm, false);
1002 
1003 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1004 
1005 	/*
1006 	 * Unconditionally decrement this counter, regardless of the queue's
1007 	 * type
1008 	 */
1009 	dqm->total_queue_count--;
1010 	pr_debug("Total of %d queues are accountable so far\n",
1011 			dqm->total_queue_count);
1012 
1013 	mutex_unlock(&dqm->lock);
1014 
1015 	return 0;
1016 
1017 failed:
1018 failed_try_destroy_debugged_queue:
1019 
1020 	mutex_unlock(&dqm->lock);
1021 	return retval;
1022 }
1023 
1024 /*
1025  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1026  * stay in user mode.
1027  */
1028 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1029 /* APE1 limit is inclusive and 64K aligned. */
1030 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1031 
1032 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1033 				   struct qcm_process_device *qpd,
1034 				   enum cache_policy default_policy,
1035 				   enum cache_policy alternate_policy,
1036 				   void __user *alternate_aperture_base,
1037 				   uint64_t alternate_aperture_size)
1038 {
1039 	bool retval;
1040 
1041 	mutex_lock(&dqm->lock);
1042 
1043 	if (alternate_aperture_size == 0) {
1044 		/* base > limit disables APE1 */
1045 		qpd->sh_mem_ape1_base = 1;
1046 		qpd->sh_mem_ape1_limit = 0;
1047 	} else {
1048 		/*
1049 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1050 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1051 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1052 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1053 		 * Verify that the base and size parameters can be
1054 		 * represented in this format and convert them.
1055 		 * Additionally restrict APE1 to user-mode addresses.
1056 		 */
1057 
1058 		uint64_t base = (uintptr_t)alternate_aperture_base;
1059 		uint64_t limit = base + alternate_aperture_size - 1;
1060 
1061 		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1062 		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1063 			retval = false;
1064 			goto out;
1065 		}
1066 
1067 		qpd->sh_mem_ape1_base = base >> 16;
1068 		qpd->sh_mem_ape1_limit = limit >> 16;
1069 	}
1070 
1071 	retval = dqm->ops_asic_specific.set_cache_memory_policy(
1072 			dqm,
1073 			qpd,
1074 			default_policy,
1075 			alternate_policy,
1076 			alternate_aperture_base,
1077 			alternate_aperture_size);
1078 
1079 	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1080 		program_sh_mem_settings(dqm, qpd);
1081 
1082 	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1083 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1084 		qpd->sh_mem_ape1_limit);
1085 
1086 out:
1087 	mutex_unlock(&dqm->lock);
1088 	return retval;
1089 }
1090 
1091 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1092 {
1093 	struct device_queue_manager *dqm;
1094 
1095 	pr_debug("Loading device queue manager\n");
1096 
1097 	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1098 	if (!dqm)
1099 		return NULL;
1100 
1101 	dqm->dev = dev;
1102 	switch (sched_policy) {
1103 	case KFD_SCHED_POLICY_HWS:
1104 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1105 		/* initialize dqm for cp scheduling */
1106 		dqm->ops.create_queue = create_queue_cpsch;
1107 		dqm->ops.initialize = initialize_cpsch;
1108 		dqm->ops.start = start_cpsch;
1109 		dqm->ops.stop = stop_cpsch;
1110 		dqm->ops.destroy_queue = destroy_queue_cpsch;
1111 		dqm->ops.update_queue = update_queue;
1112 		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
1113 		dqm->ops.register_process = register_process_nocpsch;
1114 		dqm->ops.unregister_process = unregister_process_nocpsch;
1115 		dqm->ops.uninitialize = uninitialize_nocpsch;
1116 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1117 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1118 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1119 		break;
1120 	case KFD_SCHED_POLICY_NO_HWS:
1121 		/* initialize dqm for no cp scheduling */
1122 		dqm->ops.start = start_nocpsch;
1123 		dqm->ops.stop = stop_nocpsch;
1124 		dqm->ops.create_queue = create_queue_nocpsch;
1125 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
1126 		dqm->ops.update_queue = update_queue;
1127 		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
1128 		dqm->ops.register_process = register_process_nocpsch;
1129 		dqm->ops.unregister_process = unregister_process_nocpsch;
1130 		dqm->ops.initialize = initialize_nocpsch;
1131 		dqm->ops.uninitialize = uninitialize_nocpsch;
1132 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1133 		break;
1134 	default:
1135 		pr_err("Invalid scheduling policy %d\n", sched_policy);
1136 		goto out_free;
1137 	}
1138 
1139 	switch (dev->device_info->asic_family) {
1140 	case CHIP_CARRIZO:
1141 		device_queue_manager_init_vi(&dqm->ops_asic_specific);
1142 		break;
1143 
1144 	case CHIP_KAVERI:
1145 		device_queue_manager_init_cik(&dqm->ops_asic_specific);
1146 		break;
1147 	}
1148 
1149 	if (!dqm->ops.initialize(dqm))
1150 		return dqm;
1151 
1152 out_free:
1153 	kfree(dqm);
1154 	return NULL;
1155 }
1156 
1157 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1158 {
1159 	dqm->ops.uninitialize(dqm);
1160 	kfree(dqm);
1161 }
1162