1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/slab.h>
25 #include <linux/list.h>
26 #include <linux/types.h>
27 #include <linux/printk.h>
28 #include <linux/bitops.h>
29 #include <linux/sched.h>
30 #include "kfd_priv.h"
31 #include "kfd_device_queue_manager.h"
32 #include "kfd_mqd_manager.h"
33 #include "cik_regs.h"
34 #include "kfd_kernel_queue.h"
35 
36 /* Size of the per-pipe EOP queue */
37 #define CIK_HPD_EOP_BYTES_LOG2 11
38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39 
40 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
41 					unsigned int pasid, unsigned int vmid);
42 
43 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
44 					struct queue *q,
45 					struct qcm_process_device *qpd);
46 
47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
48 static int destroy_queues_cpsch(struct device_queue_manager *dqm,
49 				bool preempt_static_queues, bool lock);
50 
51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
52 					struct queue *q,
53 					struct qcm_process_device *qpd);
54 
55 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
56 				unsigned int sdma_queue_id);
57 
58 static inline
59 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
60 {
61 	if (type == KFD_QUEUE_TYPE_SDMA)
62 		return KFD_MQD_TYPE_SDMA;
63 	return KFD_MQD_TYPE_CP;
64 }
65 
66 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
67 {
68 	int i;
69 	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
70 		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;
71 
72 	/* queue is available for KFD usage if bit is 1 */
73 	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
74 		if (test_bit(pipe_offset + i,
75 			      dqm->dev->shared_resources.queue_bitmap))
76 			return true;
77 	return false;
78 }
79 
80 unsigned int get_queues_num(struct device_queue_manager *dqm)
81 {
82 	BUG_ON(!dqm || !dqm->dev);
83 	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
84 				KGD_MAX_QUEUES);
85 }
86 
87 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
88 {
89 	BUG_ON(!dqm || !dqm->dev);
90 	return dqm->dev->shared_resources.num_queue_per_pipe;
91 }
92 
93 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
94 {
95 	BUG_ON(!dqm || !dqm->dev);
96 	return dqm->dev->shared_resources.num_pipe_per_mec;
97 }
98 
99 void program_sh_mem_settings(struct device_queue_manager *dqm,
100 					struct qcm_process_device *qpd)
101 {
102 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
103 						dqm->dev->kgd, qpd->vmid,
104 						qpd->sh_mem_config,
105 						qpd->sh_mem_ape1_base,
106 						qpd->sh_mem_ape1_limit,
107 						qpd->sh_mem_bases);
108 }
109 
110 static int allocate_vmid(struct device_queue_manager *dqm,
111 			struct qcm_process_device *qpd,
112 			struct queue *q)
113 {
114 	int bit, allocated_vmid;
115 
116 	if (dqm->vmid_bitmap == 0)
117 		return -ENOMEM;
118 
119 	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
120 	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
121 
122 	/* Kaveri kfd vmid's starts from vmid 8 */
123 	allocated_vmid = bit + KFD_VMID_START_OFFSET;
124 	pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
125 	qpd->vmid = allocated_vmid;
126 	q->properties.vmid = allocated_vmid;
127 
128 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
129 	program_sh_mem_settings(dqm, qpd);
130 
131 	return 0;
132 }
133 
134 static void deallocate_vmid(struct device_queue_manager *dqm,
135 				struct qcm_process_device *qpd,
136 				struct queue *q)
137 {
138 	int bit = qpd->vmid - KFD_VMID_START_OFFSET;
139 
140 	/* Release the vmid mapping */
141 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
142 
143 	set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
144 	qpd->vmid = 0;
145 	q->properties.vmid = 0;
146 }
147 
148 static int create_queue_nocpsch(struct device_queue_manager *dqm,
149 				struct queue *q,
150 				struct qcm_process_device *qpd,
151 				int *allocated_vmid)
152 {
153 	int retval;
154 
155 	BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
156 
157 	pr_debug("kfd: In func %s\n", __func__);
158 	print_queue(q);
159 
160 	mutex_lock(&dqm->lock);
161 
162 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
163 		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
164 				dqm->total_queue_count);
165 		mutex_unlock(&dqm->lock);
166 		return -EPERM;
167 	}
168 
169 	if (list_empty(&qpd->queues_list)) {
170 		retval = allocate_vmid(dqm, qpd, q);
171 		if (retval != 0) {
172 			mutex_unlock(&dqm->lock);
173 			return retval;
174 		}
175 	}
176 	*allocated_vmid = qpd->vmid;
177 	q->properties.vmid = qpd->vmid;
178 
179 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
180 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
181 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
182 		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
183 
184 	if (retval != 0) {
185 		if (list_empty(&qpd->queues_list)) {
186 			deallocate_vmid(dqm, qpd, q);
187 			*allocated_vmid = 0;
188 		}
189 		mutex_unlock(&dqm->lock);
190 		return retval;
191 	}
192 
193 	list_add(&q->list, &qpd->queues_list);
194 	if (q->properties.is_active)
195 		dqm->queue_count++;
196 
197 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
198 		dqm->sdma_queue_count++;
199 
200 	/*
201 	 * Unconditionally increment this counter, regardless of the queue's
202 	 * type or whether the queue is active.
203 	 */
204 	dqm->total_queue_count++;
205 	pr_debug("Total of %d queues are accountable so far\n",
206 			dqm->total_queue_count);
207 
208 	mutex_unlock(&dqm->lock);
209 	return 0;
210 }
211 
212 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
213 {
214 	bool set;
215 	int pipe, bit, i;
216 
217 	set = false;
218 
219 	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
220 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
221 
222 		if (!is_pipe_enabled(dqm, 0, pipe))
223 			continue;
224 
225 		if (dqm->allocated_queues[pipe] != 0) {
226 			bit = find_first_bit(
227 				(unsigned long *)&dqm->allocated_queues[pipe],
228 				get_queues_per_pipe(dqm));
229 
230 			clear_bit(bit,
231 				(unsigned long *)&dqm->allocated_queues[pipe]);
232 			q->pipe = pipe;
233 			q->queue = bit;
234 			set = true;
235 			break;
236 		}
237 	}
238 
239 	if (!set)
240 		return -EBUSY;
241 
242 	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
243 				__func__, q->pipe, q->queue);
244 	/* horizontal hqd allocation */
245 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
246 
247 	return 0;
248 }
249 
250 static inline void deallocate_hqd(struct device_queue_manager *dqm,
251 				struct queue *q)
252 {
253 	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
254 }
255 
256 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
257 					struct queue *q,
258 					struct qcm_process_device *qpd)
259 {
260 	int retval;
261 	struct mqd_manager *mqd;
262 
263 	BUG_ON(!dqm || !q || !qpd);
264 
265 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
266 	if (mqd == NULL)
267 		return -ENOMEM;
268 
269 	retval = allocate_hqd(dqm, q);
270 	if (retval != 0)
271 		return retval;
272 
273 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
274 				&q->gart_mqd_addr, &q->properties);
275 	if (retval != 0) {
276 		deallocate_hqd(dqm, q);
277 		return retval;
278 	}
279 
280 	pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
281 			q->pipe,
282 			q->queue);
283 
284 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
285 			q->queue, (uint32_t __user *) q->properties.write_ptr);
286 	if (retval != 0) {
287 		deallocate_hqd(dqm, q);
288 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
289 		return retval;
290 	}
291 
292 	return 0;
293 }
294 
295 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
296 				struct qcm_process_device *qpd,
297 				struct queue *q)
298 {
299 	int retval;
300 	struct mqd_manager *mqd;
301 
302 	BUG_ON(!dqm || !q || !q->mqd || !qpd);
303 
304 	retval = 0;
305 
306 	pr_debug("kfd: In Func %s\n", __func__);
307 
308 	mutex_lock(&dqm->lock);
309 
310 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
311 		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
312 		if (mqd == NULL) {
313 			retval = -ENOMEM;
314 			goto out;
315 		}
316 		deallocate_hqd(dqm, q);
317 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
318 		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
319 		if (mqd == NULL) {
320 			retval = -ENOMEM;
321 			goto out;
322 		}
323 		dqm->sdma_queue_count--;
324 		deallocate_sdma_queue(dqm, q->sdma_id);
325 	} else {
326 		pr_debug("q->properties.type is invalid (%d)\n",
327 				q->properties.type);
328 		retval = -EINVAL;
329 		goto out;
330 	}
331 
332 	retval = mqd->destroy_mqd(mqd, q->mqd,
333 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
334 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
335 				q->pipe, q->queue);
336 
337 	if (retval != 0)
338 		goto out;
339 
340 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
341 
342 	list_del(&q->list);
343 	if (list_empty(&qpd->queues_list))
344 		deallocate_vmid(dqm, qpd, q);
345 	if (q->properties.is_active)
346 		dqm->queue_count--;
347 
348 	/*
349 	 * Unconditionally decrement this counter, regardless of the queue's
350 	 * type
351 	 */
352 	dqm->total_queue_count--;
353 	pr_debug("Total of %d queues are accountable so far\n",
354 			dqm->total_queue_count);
355 
356 out:
357 	mutex_unlock(&dqm->lock);
358 	return retval;
359 }
360 
361 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
362 {
363 	int retval;
364 	struct mqd_manager *mqd;
365 	bool prev_active = false;
366 
367 	BUG_ON(!dqm || !q || !q->mqd);
368 
369 	mutex_lock(&dqm->lock);
370 	mqd = dqm->ops.get_mqd_manager(dqm,
371 			get_mqd_type_from_queue_type(q->properties.type));
372 	if (mqd == NULL) {
373 		mutex_unlock(&dqm->lock);
374 		return -ENOMEM;
375 	}
376 
377 	if (q->properties.is_active)
378 		prev_active = true;
379 
380 	/*
381 	 *
382 	 * check active state vs. the previous state
383 	 * and modify counter accordingly
384 	 */
385 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
386 	if ((q->properties.is_active) && (!prev_active))
387 		dqm->queue_count++;
388 	else if ((!q->properties.is_active) && (prev_active))
389 		dqm->queue_count--;
390 
391 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
392 		retval = execute_queues_cpsch(dqm, false);
393 
394 	mutex_unlock(&dqm->lock);
395 	return retval;
396 }
397 
398 static struct mqd_manager *get_mqd_manager_nocpsch(
399 		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
400 {
401 	struct mqd_manager *mqd;
402 
403 	BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
404 
405 	pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
406 
407 	mqd = dqm->mqds[type];
408 	if (!mqd) {
409 		mqd = mqd_manager_init(type, dqm->dev);
410 		if (mqd == NULL)
411 			pr_err("kfd: mqd manager is NULL");
412 		dqm->mqds[type] = mqd;
413 	}
414 
415 	return mqd;
416 }
417 
418 static int register_process_nocpsch(struct device_queue_manager *dqm,
419 					struct qcm_process_device *qpd)
420 {
421 	struct device_process_node *n;
422 	int retval;
423 
424 	BUG_ON(!dqm || !qpd);
425 
426 	pr_debug("kfd: In func %s\n", __func__);
427 
428 	n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
429 	if (!n)
430 		return -ENOMEM;
431 
432 	n->qpd = qpd;
433 
434 	mutex_lock(&dqm->lock);
435 	list_add(&n->list, &dqm->queues);
436 
437 	retval = dqm->ops_asic_specific.register_process(dqm, qpd);
438 
439 	dqm->processes_count++;
440 
441 	mutex_unlock(&dqm->lock);
442 
443 	return retval;
444 }
445 
446 static int unregister_process_nocpsch(struct device_queue_manager *dqm,
447 					struct qcm_process_device *qpd)
448 {
449 	int retval;
450 	struct device_process_node *cur, *next;
451 
452 	BUG_ON(!dqm || !qpd);
453 
454 	pr_debug("In func %s\n", __func__);
455 
456 	pr_debug("qpd->queues_list is %s\n",
457 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
458 
459 	retval = 0;
460 	mutex_lock(&dqm->lock);
461 
462 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
463 		if (qpd == cur->qpd) {
464 			list_del(&cur->list);
465 			kfree(cur);
466 			dqm->processes_count--;
467 			goto out;
468 		}
469 	}
470 	/* qpd not found in dqm list */
471 	retval = 1;
472 out:
473 	mutex_unlock(&dqm->lock);
474 	return retval;
475 }
476 
477 static int
478 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
479 			unsigned int vmid)
480 {
481 	uint32_t pasid_mapping;
482 
483 	pasid_mapping = (pasid == 0) ? 0 :
484 		(uint32_t)pasid |
485 		ATC_VMID_PASID_MAPPING_VALID;
486 
487 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
488 						dqm->dev->kgd, pasid_mapping,
489 						vmid);
490 }
491 
492 static void init_interrupts(struct device_queue_manager *dqm)
493 {
494 	unsigned int i;
495 
496 	BUG_ON(dqm == NULL);
497 
498 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
499 		if (is_pipe_enabled(dqm, 0, i))
500 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
501 }
502 
503 static int init_scheduler(struct device_queue_manager *dqm)
504 {
505 	int retval = 0;
506 
507 	BUG_ON(!dqm);
508 
509 	pr_debug("kfd: In %s\n", __func__);
510 
511 	return retval;
512 }
513 
514 static int initialize_nocpsch(struct device_queue_manager *dqm)
515 {
516 	int i;
517 
518 	BUG_ON(!dqm);
519 
520 	pr_debug("kfd: In func %s num of pipes: %d\n",
521 			__func__, get_pipes_per_mec(dqm));
522 
523 	mutex_init(&dqm->lock);
524 	INIT_LIST_HEAD(&dqm->queues);
525 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
526 	dqm->sdma_queue_count = 0;
527 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
528 					sizeof(unsigned int), GFP_KERNEL);
529 	if (!dqm->allocated_queues) {
530 		mutex_destroy(&dqm->lock);
531 		return -ENOMEM;
532 	}
533 
534 	for (i = 0; i < get_pipes_per_mec(dqm); i++)
535 		dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
536 
537 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
538 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
539 
540 	init_scheduler(dqm);
541 	return 0;
542 }
543 
544 static void uninitialize_nocpsch(struct device_queue_manager *dqm)
545 {
546 	int i;
547 
548 	BUG_ON(!dqm);
549 
550 	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
551 
552 	kfree(dqm->allocated_queues);
553 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
554 		kfree(dqm->mqds[i]);
555 	mutex_destroy(&dqm->lock);
556 	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
557 }
558 
559 static int start_nocpsch(struct device_queue_manager *dqm)
560 {
561 	init_interrupts(dqm);
562 	return 0;
563 }
564 
565 static int stop_nocpsch(struct device_queue_manager *dqm)
566 {
567 	return 0;
568 }
569 
570 static int allocate_sdma_queue(struct device_queue_manager *dqm,
571 				unsigned int *sdma_queue_id)
572 {
573 	int bit;
574 
575 	if (dqm->sdma_bitmap == 0)
576 		return -ENOMEM;
577 
578 	bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
579 				CIK_SDMA_QUEUES);
580 
581 	clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
582 	*sdma_queue_id = bit;
583 
584 	return 0;
585 }
586 
587 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
588 				unsigned int sdma_queue_id)
589 {
590 	if (sdma_queue_id >= CIK_SDMA_QUEUES)
591 		return;
592 	set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
593 }
594 
595 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
596 					struct queue *q,
597 					struct qcm_process_device *qpd)
598 {
599 	struct mqd_manager *mqd;
600 	int retval;
601 
602 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
603 	if (!mqd)
604 		return -ENOMEM;
605 
606 	retval = allocate_sdma_queue(dqm, &q->sdma_id);
607 	if (retval != 0)
608 		return retval;
609 
610 	q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
611 	q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
612 
613 	pr_debug("kfd: sdma id is:    %d\n", q->sdma_id);
614 	pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
615 	pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);
616 
617 	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
618 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
619 				&q->gart_mqd_addr, &q->properties);
620 	if (retval != 0) {
621 		deallocate_sdma_queue(dqm, q->sdma_id);
622 		return retval;
623 	}
624 
625 	retval = mqd->load_mqd(mqd, q->mqd, 0,
626 				0, NULL);
627 	if (retval != 0) {
628 		deallocate_sdma_queue(dqm, q->sdma_id);
629 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
630 		return retval;
631 	}
632 
633 	return 0;
634 }
635 
636 /*
637  * Device Queue Manager implementation for cp scheduler
638  */
639 
640 static int set_sched_resources(struct device_queue_manager *dqm)
641 {
642 	int i, mec;
643 	struct scheduling_resources res;
644 
645 	BUG_ON(!dqm);
646 
647 	pr_debug("kfd: In func %s\n", __func__);
648 
649 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
650 	res.vmid_mask <<= KFD_VMID_START_OFFSET;
651 
652 	res.queue_mask = 0;
653 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
654 		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
655 			/ dqm->dev->shared_resources.num_pipe_per_mec;
656 
657 		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
658 			continue;
659 
660 		/* only acquire queues from the first MEC */
661 		if (mec > 0)
662 			continue;
663 
664 		/* This situation may be hit in the future if a new HW
665 		 * generation exposes more than 64 queues. If so, the
666 		 * definition of res.queue_mask needs updating */
667 		if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
668 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
669 			break;
670 		}
671 
672 		res.queue_mask |= (1ull << i);
673 	}
674 	res.gws_mask = res.oac_mask = res.gds_heap_base =
675 						res.gds_heap_size = 0;
676 
677 	pr_debug("kfd: scheduling resources:\n"
678 			"      vmid mask: 0x%8X\n"
679 			"      queue mask: 0x%8llX\n",
680 			res.vmid_mask, res.queue_mask);
681 
682 	return pm_send_set_resources(&dqm->packets, &res);
683 }
684 
685 static int initialize_cpsch(struct device_queue_manager *dqm)
686 {
687 	int retval;
688 
689 	BUG_ON(!dqm);
690 
691 	pr_debug("kfd: In func %s num of pipes: %d\n",
692 			__func__, get_pipes_per_mec(dqm));
693 
694 	mutex_init(&dqm->lock);
695 	INIT_LIST_HEAD(&dqm->queues);
696 	dqm->queue_count = dqm->processes_count = 0;
697 	dqm->sdma_queue_count = 0;
698 	dqm->active_runlist = false;
699 	retval = dqm->ops_asic_specific.initialize(dqm);
700 	if (retval != 0)
701 		goto fail_init_pipelines;
702 
703 	return 0;
704 
705 fail_init_pipelines:
706 	mutex_destroy(&dqm->lock);
707 	return retval;
708 }
709 
710 static int start_cpsch(struct device_queue_manager *dqm)
711 {
712 	struct device_process_node *node;
713 	int retval;
714 
715 	BUG_ON(!dqm);
716 
717 	retval = 0;
718 
719 	retval = pm_init(&dqm->packets, dqm);
720 	if (retval != 0)
721 		goto fail_packet_manager_init;
722 
723 	retval = set_sched_resources(dqm);
724 	if (retval != 0)
725 		goto fail_set_sched_resources;
726 
727 	pr_debug("kfd: allocating fence memory\n");
728 
729 	/* allocate fence memory on the gart */
730 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
731 					&dqm->fence_mem);
732 
733 	if (retval != 0)
734 		goto fail_allocate_vidmem;
735 
736 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
737 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
738 
739 	init_interrupts(dqm);
740 
741 	list_for_each_entry(node, &dqm->queues, list)
742 		if (node->qpd->pqm->process && dqm->dev)
743 			kfd_bind_process_to_device(dqm->dev,
744 						node->qpd->pqm->process);
745 
746 	execute_queues_cpsch(dqm, true);
747 
748 	return 0;
749 fail_allocate_vidmem:
750 fail_set_sched_resources:
751 	pm_uninit(&dqm->packets);
752 fail_packet_manager_init:
753 	return retval;
754 }
755 
756 static int stop_cpsch(struct device_queue_manager *dqm)
757 {
758 	struct device_process_node *node;
759 	struct kfd_process_device *pdd;
760 
761 	BUG_ON(!dqm);
762 
763 	destroy_queues_cpsch(dqm, true, true);
764 
765 	list_for_each_entry(node, &dqm->queues, list) {
766 		pdd = qpd_to_pdd(node->qpd);
767 		pdd->bound = false;
768 	}
769 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
770 	pm_uninit(&dqm->packets);
771 
772 	return 0;
773 }
774 
775 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
776 					struct kernel_queue *kq,
777 					struct qcm_process_device *qpd)
778 {
779 	BUG_ON(!dqm || !kq || !qpd);
780 
781 	pr_debug("kfd: In func %s\n", __func__);
782 
783 	mutex_lock(&dqm->lock);
784 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
785 		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
786 				dqm->total_queue_count);
787 		mutex_unlock(&dqm->lock);
788 		return -EPERM;
789 	}
790 
791 	/*
792 	 * Unconditionally increment this counter, regardless of the queue's
793 	 * type or whether the queue is active.
794 	 */
795 	dqm->total_queue_count++;
796 	pr_debug("Total of %d queues are accountable so far\n",
797 			dqm->total_queue_count);
798 
799 	list_add(&kq->list, &qpd->priv_queue_list);
800 	dqm->queue_count++;
801 	qpd->is_debug = true;
802 	execute_queues_cpsch(dqm, false);
803 	mutex_unlock(&dqm->lock);
804 
805 	return 0;
806 }
807 
808 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
809 					struct kernel_queue *kq,
810 					struct qcm_process_device *qpd)
811 {
812 	BUG_ON(!dqm || !kq);
813 
814 	pr_debug("kfd: In %s\n", __func__);
815 
816 	mutex_lock(&dqm->lock);
817 	/* here we actually preempt the DIQ */
818 	destroy_queues_cpsch(dqm, true, false);
819 	list_del(&kq->list);
820 	dqm->queue_count--;
821 	qpd->is_debug = false;
822 	execute_queues_cpsch(dqm, false);
823 	/*
824 	 * Unconditionally decrement this counter, regardless of the queue's
825 	 * type.
826 	 */
827 	dqm->total_queue_count--;
828 	pr_debug("Total of %d queues are accountable so far\n",
829 			dqm->total_queue_count);
830 	mutex_unlock(&dqm->lock);
831 }
832 
833 static void select_sdma_engine_id(struct queue *q)
834 {
835 	static int sdma_id;
836 
837 	q->sdma_id = sdma_id;
838 	sdma_id = (sdma_id + 1) % 2;
839 }
840 
841 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
842 			struct qcm_process_device *qpd, int *allocate_vmid)
843 {
844 	int retval;
845 	struct mqd_manager *mqd;
846 
847 	BUG_ON(!dqm || !q || !qpd);
848 
849 	retval = 0;
850 
851 	if (allocate_vmid)
852 		*allocate_vmid = 0;
853 
854 	mutex_lock(&dqm->lock);
855 
856 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
857 		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
858 				dqm->total_queue_count);
859 		retval = -EPERM;
860 		goto out;
861 	}
862 
863 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
864 		select_sdma_engine_id(q);
865 
866 	mqd = dqm->ops.get_mqd_manager(dqm,
867 			get_mqd_type_from_queue_type(q->properties.type));
868 
869 	if (mqd == NULL) {
870 		mutex_unlock(&dqm->lock);
871 		return -ENOMEM;
872 	}
873 
874 	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
875 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
876 				&q->gart_mqd_addr, &q->properties);
877 	if (retval != 0)
878 		goto out;
879 
880 	list_add(&q->list, &qpd->queues_list);
881 	if (q->properties.is_active) {
882 		dqm->queue_count++;
883 		retval = execute_queues_cpsch(dqm, false);
884 	}
885 
886 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
887 			dqm->sdma_queue_count++;
888 	/*
889 	 * Unconditionally increment this counter, regardless of the queue's
890 	 * type or whether the queue is active.
891 	 */
892 	dqm->total_queue_count++;
893 
894 	pr_debug("Total of %d queues are accountable so far\n",
895 			dqm->total_queue_count);
896 
897 out:
898 	mutex_unlock(&dqm->lock);
899 	return retval;
900 }
901 
902 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
903 				unsigned int fence_value,
904 				unsigned long timeout)
905 {
906 	BUG_ON(!fence_addr);
907 	timeout += jiffies;
908 
909 	while (*fence_addr != fence_value) {
910 		if (time_after(jiffies, timeout)) {
911 			pr_err("kfd: qcm fence wait loop timeout expired\n");
912 			return -ETIME;
913 		}
914 		schedule();
915 	}
916 
917 	return 0;
918 }
919 
920 static int destroy_sdma_queues(struct device_queue_manager *dqm,
921 				unsigned int sdma_engine)
922 {
923 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
924 			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
925 			sdma_engine);
926 }
927 
928 static int destroy_queues_cpsch(struct device_queue_manager *dqm,
929 				bool preempt_static_queues, bool lock)
930 {
931 	int retval;
932 	enum kfd_preempt_type_filter preempt_type;
933 	struct kfd_process_device *pdd;
934 
935 	BUG_ON(!dqm);
936 
937 	retval = 0;
938 
939 	if (lock)
940 		mutex_lock(&dqm->lock);
941 	if (!dqm->active_runlist)
942 		goto out;
943 
944 	pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
945 		dqm->sdma_queue_count);
946 
947 	if (dqm->sdma_queue_count > 0) {
948 		destroy_sdma_queues(dqm, 0);
949 		destroy_sdma_queues(dqm, 1);
950 	}
951 
952 	preempt_type = preempt_static_queues ?
953 			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
954 			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
955 
956 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
957 			preempt_type, 0, false, 0);
958 	if (retval != 0)
959 		goto out;
960 
961 	*dqm->fence_addr = KFD_FENCE_INIT;
962 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
963 				KFD_FENCE_COMPLETED);
964 	/* should be timed out */
965 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
966 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
967 	if (retval != 0) {
968 		pdd = kfd_get_process_device_data(dqm->dev,
969 				kfd_get_process(current));
970 		pdd->reset_wavefronts = true;
971 		goto out;
972 	}
973 	pm_release_ib(&dqm->packets);
974 	dqm->active_runlist = false;
975 
976 out:
977 	if (lock)
978 		mutex_unlock(&dqm->lock);
979 	return retval;
980 }
981 
982 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
983 {
984 	int retval;
985 
986 	BUG_ON(!dqm);
987 
988 	if (lock)
989 		mutex_lock(&dqm->lock);
990 
991 	retval = destroy_queues_cpsch(dqm, false, false);
992 	if (retval != 0) {
993 		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
994 		goto out;
995 	}
996 
997 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
998 		retval = 0;
999 		goto out;
1000 	}
1001 
1002 	if (dqm->active_runlist) {
1003 		retval = 0;
1004 		goto out;
1005 	}
1006 
1007 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1008 	if (retval != 0) {
1009 		pr_err("kfd: failed to execute runlist");
1010 		goto out;
1011 	}
1012 	dqm->active_runlist = true;
1013 
1014 out:
1015 	if (lock)
1016 		mutex_unlock(&dqm->lock);
1017 	return retval;
1018 }
1019 
1020 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1021 				struct qcm_process_device *qpd,
1022 				struct queue *q)
1023 {
1024 	int retval;
1025 	struct mqd_manager *mqd;
1026 	bool preempt_all_queues;
1027 
1028 	BUG_ON(!dqm || !qpd || !q);
1029 
1030 	preempt_all_queues = false;
1031 
1032 	retval = 0;
1033 
1034 	/* remove queue from list to prevent rescheduling after preemption */
1035 	mutex_lock(&dqm->lock);
1036 
1037 	if (qpd->is_debug) {
1038 		/*
1039 		 * error, currently we do not allow to destroy a queue
1040 		 * of a currently debugged process
1041 		 */
1042 		retval = -EBUSY;
1043 		goto failed_try_destroy_debugged_queue;
1044 
1045 	}
1046 
1047 	mqd = dqm->ops.get_mqd_manager(dqm,
1048 			get_mqd_type_from_queue_type(q->properties.type));
1049 	if (!mqd) {
1050 		retval = -ENOMEM;
1051 		goto failed;
1052 	}
1053 
1054 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1055 		dqm->sdma_queue_count--;
1056 
1057 	list_del(&q->list);
1058 	if (q->properties.is_active)
1059 		dqm->queue_count--;
1060 
1061 	execute_queues_cpsch(dqm, false);
1062 
1063 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1064 
1065 	/*
1066 	 * Unconditionally decrement this counter, regardless of the queue's
1067 	 * type
1068 	 */
1069 	dqm->total_queue_count--;
1070 	pr_debug("Total of %d queues are accountable so far\n",
1071 			dqm->total_queue_count);
1072 
1073 	mutex_unlock(&dqm->lock);
1074 
1075 	return 0;
1076 
1077 failed:
1078 failed_try_destroy_debugged_queue:
1079 
1080 	mutex_unlock(&dqm->lock);
1081 	return retval;
1082 }
1083 
1084 /*
1085  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1086  * stay in user mode.
1087  */
1088 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1089 /* APE1 limit is inclusive and 64K aligned. */
1090 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1091 
1092 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1093 				   struct qcm_process_device *qpd,
1094 				   enum cache_policy default_policy,
1095 				   enum cache_policy alternate_policy,
1096 				   void __user *alternate_aperture_base,
1097 				   uint64_t alternate_aperture_size)
1098 {
1099 	bool retval;
1100 
1101 	pr_debug("kfd: In func %s\n", __func__);
1102 
1103 	mutex_lock(&dqm->lock);
1104 
1105 	if (alternate_aperture_size == 0) {
1106 		/* base > limit disables APE1 */
1107 		qpd->sh_mem_ape1_base = 1;
1108 		qpd->sh_mem_ape1_limit = 0;
1109 	} else {
1110 		/*
1111 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1112 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1113 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1114 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1115 		 * Verify that the base and size parameters can be
1116 		 * represented in this format and convert them.
1117 		 * Additionally restrict APE1 to user-mode addresses.
1118 		 */
1119 
1120 		uint64_t base = (uintptr_t)alternate_aperture_base;
1121 		uint64_t limit = base + alternate_aperture_size - 1;
1122 
1123 		if (limit <= base)
1124 			goto out;
1125 
1126 		if ((base & APE1_FIXED_BITS_MASK) != 0)
1127 			goto out;
1128 
1129 		if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
1130 			goto out;
1131 
1132 		qpd->sh_mem_ape1_base = base >> 16;
1133 		qpd->sh_mem_ape1_limit = limit >> 16;
1134 	}
1135 
1136 	retval = dqm->ops_asic_specific.set_cache_memory_policy(
1137 			dqm,
1138 			qpd,
1139 			default_policy,
1140 			alternate_policy,
1141 			alternate_aperture_base,
1142 			alternate_aperture_size);
1143 
1144 	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1145 		program_sh_mem_settings(dqm, qpd);
1146 
1147 	pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1148 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1149 		qpd->sh_mem_ape1_limit);
1150 
1151 	mutex_unlock(&dqm->lock);
1152 	return retval;
1153 
1154 out:
1155 	mutex_unlock(&dqm->lock);
1156 	return false;
1157 }
1158 
1159 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1160 {
1161 	struct device_queue_manager *dqm;
1162 
1163 	BUG_ON(!dev);
1164 
1165 	pr_debug("kfd: loading device queue manager\n");
1166 
1167 	dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
1168 	if (!dqm)
1169 		return NULL;
1170 
1171 	dqm->dev = dev;
1172 	switch (sched_policy) {
1173 	case KFD_SCHED_POLICY_HWS:
1174 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1175 		/* initialize dqm for cp scheduling */
1176 		dqm->ops.create_queue = create_queue_cpsch;
1177 		dqm->ops.initialize = initialize_cpsch;
1178 		dqm->ops.start = start_cpsch;
1179 		dqm->ops.stop = stop_cpsch;
1180 		dqm->ops.destroy_queue = destroy_queue_cpsch;
1181 		dqm->ops.update_queue = update_queue;
1182 		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
1183 		dqm->ops.register_process = register_process_nocpsch;
1184 		dqm->ops.unregister_process = unregister_process_nocpsch;
1185 		dqm->ops.uninitialize = uninitialize_nocpsch;
1186 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1187 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1188 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1189 		break;
1190 	case KFD_SCHED_POLICY_NO_HWS:
1191 		/* initialize dqm for no cp scheduling */
1192 		dqm->ops.start = start_nocpsch;
1193 		dqm->ops.stop = stop_nocpsch;
1194 		dqm->ops.create_queue = create_queue_nocpsch;
1195 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
1196 		dqm->ops.update_queue = update_queue;
1197 		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
1198 		dqm->ops.register_process = register_process_nocpsch;
1199 		dqm->ops.unregister_process = unregister_process_nocpsch;
1200 		dqm->ops.initialize = initialize_nocpsch;
1201 		dqm->ops.uninitialize = uninitialize_nocpsch;
1202 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1203 		break;
1204 	default:
1205 		BUG();
1206 		break;
1207 	}
1208 
1209 	switch (dev->device_info->asic_family) {
1210 	case CHIP_CARRIZO:
1211 		device_queue_manager_init_vi(&dqm->ops_asic_specific);
1212 		break;
1213 
1214 	case CHIP_KAVERI:
1215 		device_queue_manager_init_cik(&dqm->ops_asic_specific);
1216 		break;
1217 	}
1218 
1219 	if (dqm->ops.initialize(dqm) != 0) {
1220 		kfree(dqm);
1221 		return NULL;
1222 	}
1223 
1224 	return dqm;
1225 }
1226 
1227 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1228 {
1229 	BUG_ON(!dqm);
1230 
1231 	dqm->ops.uninitialize(dqm);
1232 	kfree(dqm);
1233 }
1234