1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/slab.h>
25 #include <linux/list.h>
26 #include <linux/types.h>
27 #include <linux/printk.h>
28 #include <linux/bitops.h>
29 #include "kfd_priv.h"
30 #include "kfd_device_queue_manager.h"
31 #include "kfd_mqd_manager.h"
32 #include "cik_regs.h"
33 #include "kfd_kernel_queue.h"
34 #include "../../radeon/cik_reg.h"
35 
36 /* Size of the per-pipe EOP queue */
37 #define CIK_HPD_EOP_BYTES_LOG2 11
38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39 
40 static bool is_mem_initialized;
41 
42 static int init_memory(struct device_queue_manager *dqm);
43 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
44 					unsigned int pasid, unsigned int vmid);
45 
46 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
47 					struct queue *q,
48 					struct qcm_process_device *qpd);
49 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
50 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
51 
52 
53 static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
54 {
55 	BUG_ON(!dqm || !dqm->dev);
56 	return dqm->dev->shared_resources.compute_pipe_count;
57 }
58 
59 static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
60 {
61 	BUG_ON(!dqm);
62 	return dqm->dev->shared_resources.first_compute_pipe;
63 }
64 
65 static inline unsigned int get_pipes_num_cpsch(void)
66 {
67 	return PIPE_PER_ME_CP_SCHEDULING;
68 }
69 
70 static inline unsigned int
71 get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
72 {
73 	uint32_t nybble;
74 
75 	nybble = (pdd->lds_base >> 60) & 0x0E;
76 
77 	return nybble;
78 
79 }
80 
81 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
82 {
83 	unsigned int shared_base;
84 
85 	shared_base = (pdd->lds_base >> 16) & 0xFF;
86 
87 	return shared_base;
88 }
89 
90 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
91 static void init_process_memory(struct device_queue_manager *dqm,
92 				struct qcm_process_device *qpd)
93 {
94 	struct kfd_process_device *pdd;
95 	unsigned int temp;
96 
97 	BUG_ON(!dqm || !qpd);
98 
99 	pdd = qpd_to_pdd(qpd);
100 
101 	/* check if sh_mem_config register already configured */
102 	if (qpd->sh_mem_config == 0) {
103 		qpd->sh_mem_config =
104 			ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
105 			DEFAULT_MTYPE(MTYPE_NONCACHED) |
106 			APE1_MTYPE(MTYPE_NONCACHED);
107 		qpd->sh_mem_ape1_limit = 0;
108 		qpd->sh_mem_ape1_base = 0;
109 	}
110 
111 	if (qpd->pqm->process->is_32bit_user_mode) {
112 		temp = get_sh_mem_bases_32(pdd);
113 		qpd->sh_mem_bases = SHARED_BASE(temp);
114 		qpd->sh_mem_config |= PTR32;
115 	} else {
116 		temp = get_sh_mem_bases_nybble_64(pdd);
117 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
118 	}
119 
120 	pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
121 		qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
122 }
123 
124 static void program_sh_mem_settings(struct device_queue_manager *dqm,
125 					struct qcm_process_device *qpd)
126 {
127 	return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
128 						qpd->sh_mem_config,
129 						qpd->sh_mem_ape1_base,
130 						qpd->sh_mem_ape1_limit,
131 						qpd->sh_mem_bases);
132 }
133 
134 static int allocate_vmid(struct device_queue_manager *dqm,
135 			struct qcm_process_device *qpd,
136 			struct queue *q)
137 {
138 	int bit, allocated_vmid;
139 
140 	if (dqm->vmid_bitmap == 0)
141 		return -ENOMEM;
142 
143 	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
144 	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
145 
146 	/* Kaveri kfd vmid's starts from vmid 8 */
147 	allocated_vmid = bit + KFD_VMID_START_OFFSET;
148 	pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
149 	qpd->vmid = allocated_vmid;
150 	q->properties.vmid = allocated_vmid;
151 
152 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
153 	program_sh_mem_settings(dqm, qpd);
154 
155 	return 0;
156 }
157 
158 static void deallocate_vmid(struct device_queue_manager *dqm,
159 				struct qcm_process_device *qpd,
160 				struct queue *q)
161 {
162 	int bit = qpd->vmid - KFD_VMID_START_OFFSET;
163 
164 	/* Release the vmid mapping */
165 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
166 
167 	set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
168 	qpd->vmid = 0;
169 	q->properties.vmid = 0;
170 }
171 
172 static int create_queue_nocpsch(struct device_queue_manager *dqm,
173 				struct queue *q,
174 				struct qcm_process_device *qpd,
175 				int *allocated_vmid)
176 {
177 	int retval;
178 
179 	BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
180 
181 	pr_debug("kfd: In func %s\n", __func__);
182 	print_queue(q);
183 
184 	mutex_lock(&dqm->lock);
185 
186 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
187 		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
188 				dqm->total_queue_count);
189 		mutex_unlock(&dqm->lock);
190 		return -EPERM;
191 	}
192 
193 	if (list_empty(&qpd->queues_list)) {
194 		retval = allocate_vmid(dqm, qpd, q);
195 		if (retval != 0) {
196 			mutex_unlock(&dqm->lock);
197 			return retval;
198 		}
199 	}
200 	*allocated_vmid = qpd->vmid;
201 	q->properties.vmid = qpd->vmid;
202 
203 	retval = create_compute_queue_nocpsch(dqm, q, qpd);
204 
205 	if (retval != 0) {
206 		if (list_empty(&qpd->queues_list)) {
207 			deallocate_vmid(dqm, qpd, q);
208 			*allocated_vmid = 0;
209 		}
210 		mutex_unlock(&dqm->lock);
211 		return retval;
212 	}
213 
214 	list_add(&q->list, &qpd->queues_list);
215 	dqm->queue_count++;
216 
217 	/*
218 	 * Unconditionally increment this counter, regardless of the queue's
219 	 * type or whether the queue is active.
220 	 */
221 	dqm->total_queue_count++;
222 	pr_debug("Total of %d queues are accountable so far\n",
223 			dqm->total_queue_count);
224 
225 	mutex_unlock(&dqm->lock);
226 	return 0;
227 }
228 
229 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
230 {
231 	bool set;
232 	int pipe, bit;
233 
234 	set = false;
235 
236 	for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
237 			pipe = (pipe + 1) % get_pipes_num(dqm)) {
238 		if (dqm->allocated_queues[pipe] != 0) {
239 			bit = find_first_bit(
240 				(unsigned long *)&dqm->allocated_queues[pipe],
241 				QUEUES_PER_PIPE);
242 
243 			clear_bit(bit,
244 				(unsigned long *)&dqm->allocated_queues[pipe]);
245 			q->pipe = pipe;
246 			q->queue = bit;
247 			set = true;
248 			break;
249 		}
250 	}
251 
252 	if (set == false)
253 		return -EBUSY;
254 
255 	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
256 				__func__, q->pipe, q->queue);
257 	/* horizontal hqd allocation */
258 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
259 
260 	return 0;
261 }
262 
263 static inline void deallocate_hqd(struct device_queue_manager *dqm,
264 				struct queue *q)
265 {
266 	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
267 }
268 
269 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
270 					struct queue *q,
271 					struct qcm_process_device *qpd)
272 {
273 	int retval;
274 	struct mqd_manager *mqd;
275 
276 	BUG_ON(!dqm || !q || !qpd);
277 
278 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
279 	if (mqd == NULL)
280 		return -ENOMEM;
281 
282 	retval = allocate_hqd(dqm, q);
283 	if (retval != 0)
284 		return retval;
285 
286 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
287 				&q->gart_mqd_addr, &q->properties);
288 	if (retval != 0) {
289 		deallocate_hqd(dqm, q);
290 		return retval;
291 	}
292 
293 	pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
294 			q->pipe,
295 			q->queue);
296 
297 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
298 			q->queue, (uint32_t __user *) q->properties.write_ptr);
299 	if (retval != 0) {
300 		deallocate_hqd(dqm, q);
301 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
302 		return retval;
303 	}
304 
305 	return 0;
306 }
307 
308 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
309 				struct qcm_process_device *qpd,
310 				struct queue *q)
311 {
312 	int retval;
313 	struct mqd_manager *mqd;
314 
315 	BUG_ON(!dqm || !q || !q->mqd || !qpd);
316 
317 	retval = 0;
318 
319 	pr_debug("kfd: In Func %s\n", __func__);
320 
321 	mutex_lock(&dqm->lock);
322 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
323 	if (mqd == NULL) {
324 		retval = -ENOMEM;
325 		goto out;
326 	}
327 
328 	retval = mqd->destroy_mqd(mqd, q->mqd,
329 				KFD_PREEMPT_TYPE_WAVEFRONT,
330 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
331 				q->pipe, q->queue);
332 
333 	if (retval != 0)
334 		goto out;
335 
336 	deallocate_hqd(dqm, q);
337 
338 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
339 
340 	list_del(&q->list);
341 	if (list_empty(&qpd->queues_list))
342 		deallocate_vmid(dqm, qpd, q);
343 	dqm->queue_count--;
344 
345 	/*
346 	 * Unconditionally decrement this counter, regardless of the queue's
347 	 * type
348 	 */
349 	dqm->total_queue_count--;
350 	pr_debug("Total of %d queues are accountable so far\n",
351 			dqm->total_queue_count);
352 
353 out:
354 	mutex_unlock(&dqm->lock);
355 	return retval;
356 }
357 
358 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
359 {
360 	int retval;
361 	struct mqd_manager *mqd;
362 	bool prev_active = false;
363 
364 	BUG_ON(!dqm || !q || !q->mqd);
365 
366 	mutex_lock(&dqm->lock);
367 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
368 	if (mqd == NULL) {
369 		mutex_unlock(&dqm->lock);
370 		return -ENOMEM;
371 	}
372 
373 	if (q->properties.is_active == true)
374 		prev_active = true;
375 
376 	/*
377 	 *
378 	 * check active state vs. the previous state
379 	 * and modify counter accordingly
380 	 */
381 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
382 	if ((q->properties.is_active == true) && (prev_active == false))
383 		dqm->queue_count++;
384 	else if ((q->properties.is_active == false) && (prev_active == true))
385 		dqm->queue_count--;
386 
387 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
388 		retval = execute_queues_cpsch(dqm, false);
389 
390 	mutex_unlock(&dqm->lock);
391 	return retval;
392 }
393 
394 static struct mqd_manager *get_mqd_manager_nocpsch(
395 		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
396 {
397 	struct mqd_manager *mqd;
398 
399 	BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
400 
401 	pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
402 
403 	mqd = dqm->mqds[type];
404 	if (!mqd) {
405 		mqd = mqd_manager_init(type, dqm->dev);
406 		if (mqd == NULL)
407 			pr_err("kfd: mqd manager is NULL");
408 		dqm->mqds[type] = mqd;
409 	}
410 
411 	return mqd;
412 }
413 
414 static int register_process_nocpsch(struct device_queue_manager *dqm,
415 					struct qcm_process_device *qpd)
416 {
417 	struct device_process_node *n;
418 
419 	BUG_ON(!dqm || !qpd);
420 
421 	pr_debug("kfd: In func %s\n", __func__);
422 
423 	n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
424 	if (!n)
425 		return -ENOMEM;
426 
427 	n->qpd = qpd;
428 
429 	mutex_lock(&dqm->lock);
430 	list_add(&n->list, &dqm->queues);
431 
432 	init_process_memory(dqm, qpd);
433 	dqm->processes_count++;
434 
435 	mutex_unlock(&dqm->lock);
436 
437 	return 0;
438 }
439 
440 static int unregister_process_nocpsch(struct device_queue_manager *dqm,
441 					struct qcm_process_device *qpd)
442 {
443 	int retval;
444 	struct device_process_node *cur, *next;
445 
446 	BUG_ON(!dqm || !qpd);
447 
448 	BUG_ON(!list_empty(&qpd->queues_list));
449 
450 	pr_debug("kfd: In func %s\n", __func__);
451 
452 	retval = 0;
453 	mutex_lock(&dqm->lock);
454 
455 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
456 		if (qpd == cur->qpd) {
457 			list_del(&cur->list);
458 			kfree(cur);
459 			dqm->processes_count--;
460 			goto out;
461 		}
462 	}
463 	/* qpd not found in dqm list */
464 	retval = 1;
465 out:
466 	mutex_unlock(&dqm->lock);
467 	return retval;
468 }
469 
470 static int
471 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
472 			unsigned int vmid)
473 {
474 	uint32_t pasid_mapping;
475 
476 	pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
477 						ATC_VMID_PASID_MAPPING_VALID;
478 	return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,
479 						vmid);
480 }
481 
482 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
483 {
484 	/* In 64-bit mode, we can only control the top 3 bits of the LDS,
485 	 * scratch and GPUVM apertures.
486 	 * The hardware fills in the remaining 59 bits according to the
487 	 * following pattern:
488 	 * LDS:		X0000000'00000000 - X0000001'00000000 (4GB)
489 	 * Scratch:	X0000001'00000000 - X0000002'00000000 (4GB)
490 	 * GPUVM:	Y0010000'00000000 - Y0020000'00000000 (1TB)
491 	 *
492 	 * (where X/Y is the configurable nybble with the low-bit 0)
493 	 *
494 	 * LDS and scratch will have the same top nybble programmed in the
495 	 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
496 	 * GPUVM can have a different top nybble programmed in the
497 	 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
498 	 * We don't bother to support different top nybbles
499 	 * for LDS/Scratch and GPUVM.
500 	 */
501 
502 	BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
503 		top_address_nybble == 0);
504 
505 	return PRIVATE_BASE(top_address_nybble << 12) |
506 			SHARED_BASE(top_address_nybble << 12);
507 }
508 
509 static int init_memory(struct device_queue_manager *dqm)
510 {
511 	int i, retval;
512 
513 	for (i = 8; i < 16; i++)
514 		set_pasid_vmid_mapping(dqm, 0, i);
515 
516 	retval = kfd2kgd->init_memory(dqm->dev->kgd);
517 	if (retval == 0)
518 		is_mem_initialized = true;
519 	return retval;
520 }
521 
522 
523 static int init_pipelines(struct device_queue_manager *dqm,
524 			unsigned int pipes_num, unsigned int first_pipe)
525 {
526 	void *hpdptr;
527 	struct mqd_manager *mqd;
528 	unsigned int i, err, inx;
529 	uint64_t pipe_hpd_addr;
530 
531 	BUG_ON(!dqm || !dqm->dev);
532 
533 	pr_debug("kfd: In func %s\n", __func__);
534 
535 	/*
536 	 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
537 	 * The driver never accesses this memory after zeroing it.
538 	 * It doesn't even have to be saved/restored on suspend/resume
539 	 * because it contains no data when there are no active queues.
540 	 */
541 
542 	err = kfd2kgd->allocate_mem(dqm->dev->kgd,
543 				CIK_HPD_EOP_BYTES * pipes_num,
544 				PAGE_SIZE,
545 				KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
546 				(struct kgd_mem **) &dqm->pipeline_mem);
547 
548 	if (err) {
549 		pr_err("kfd: error allocate vidmem num pipes: %d\n",
550 			pipes_num);
551 		return -ENOMEM;
552 	}
553 
554 	hpdptr = dqm->pipeline_mem->cpu_ptr;
555 	dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
556 
557 	memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
558 
559 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
560 	if (mqd == NULL) {
561 		kfd2kgd->free_mem(dqm->dev->kgd,
562 				(struct kgd_mem *) dqm->pipeline_mem);
563 		return -ENOMEM;
564 	}
565 
566 	for (i = 0; i < pipes_num; i++) {
567 		inx = i + first_pipe;
568 		/*
569 		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
570 		 * space in GTT for pipelines we don't initialize
571 		 */
572 		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
573 		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
574 		/* = log2(bytes/4)-1 */
575 		kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
576 				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
577 	}
578 
579 	return 0;
580 }
581 
582 
583 static int init_scheduler(struct device_queue_manager *dqm)
584 {
585 	int retval;
586 
587 	BUG_ON(!dqm);
588 
589 	pr_debug("kfd: In %s\n", __func__);
590 
591 	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
592 	if (retval != 0)
593 		return retval;
594 
595 	retval = init_memory(dqm);
596 
597 	return retval;
598 }
599 
600 static int initialize_nocpsch(struct device_queue_manager *dqm)
601 {
602 	int i;
603 
604 	BUG_ON(!dqm);
605 
606 	pr_debug("kfd: In func %s num of pipes: %d\n",
607 			__func__, get_pipes_num(dqm));
608 
609 	mutex_init(&dqm->lock);
610 	INIT_LIST_HEAD(&dqm->queues);
611 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
612 	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
613 					sizeof(unsigned int), GFP_KERNEL);
614 	if (!dqm->allocated_queues) {
615 		mutex_destroy(&dqm->lock);
616 		return -ENOMEM;
617 	}
618 
619 	for (i = 0; i < get_pipes_num(dqm); i++)
620 		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
621 
622 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
623 
624 	init_scheduler(dqm);
625 	return 0;
626 }
627 
628 static void uninitialize_nocpsch(struct device_queue_manager *dqm)
629 {
630 	int i;
631 
632 	BUG_ON(!dqm);
633 
634 	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
635 
636 	kfree(dqm->allocated_queues);
637 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
638 		kfree(dqm->mqds[i]);
639 	mutex_destroy(&dqm->lock);
640 	kfd2kgd->free_mem(dqm->dev->kgd,
641 			(struct kgd_mem *) dqm->pipeline_mem);
642 }
643 
644 static int start_nocpsch(struct device_queue_manager *dqm)
645 {
646 	return 0;
647 }
648 
649 static int stop_nocpsch(struct device_queue_manager *dqm)
650 {
651 	return 0;
652 }
653 
654 /*
655  * Device Queue Manager implementation for cp scheduler
656  */
657 
658 static int set_sched_resources(struct device_queue_manager *dqm)
659 {
660 	struct scheduling_resources res;
661 	unsigned int queue_num, queue_mask;
662 
663 	BUG_ON(!dqm);
664 
665 	pr_debug("kfd: In func %s\n", __func__);
666 
667 	queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
668 	queue_mask = (1 << queue_num) - 1;
669 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
670 	res.vmid_mask <<= KFD_VMID_START_OFFSET;
671 	res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
672 	res.gws_mask = res.oac_mask = res.gds_heap_base =
673 						res.gds_heap_size = 0;
674 
675 	pr_debug("kfd: scheduling resources:\n"
676 			"      vmid mask: 0x%8X\n"
677 			"      queue mask: 0x%8llX\n",
678 			res.vmid_mask, res.queue_mask);
679 
680 	return pm_send_set_resources(&dqm->packets, &res);
681 }
682 
683 static int initialize_cpsch(struct device_queue_manager *dqm)
684 {
685 	int retval;
686 
687 	BUG_ON(!dqm);
688 
689 	pr_debug("kfd: In func %s num of pipes: %d\n",
690 			__func__, get_pipes_num_cpsch());
691 
692 	mutex_init(&dqm->lock);
693 	INIT_LIST_HEAD(&dqm->queues);
694 	dqm->queue_count = dqm->processes_count = 0;
695 	dqm->active_runlist = false;
696 	retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
697 	if (retval != 0)
698 		goto fail_init_pipelines;
699 
700 	return 0;
701 
702 fail_init_pipelines:
703 	mutex_destroy(&dqm->lock);
704 	return retval;
705 }
706 
707 static int start_cpsch(struct device_queue_manager *dqm)
708 {
709 	struct device_process_node *node;
710 	int retval;
711 
712 	BUG_ON(!dqm);
713 
714 	retval = 0;
715 
716 	retval = pm_init(&dqm->packets, dqm);
717 	if (retval != 0)
718 		goto fail_packet_manager_init;
719 
720 	retval = set_sched_resources(dqm);
721 	if (retval != 0)
722 		goto fail_set_sched_resources;
723 
724 	pr_debug("kfd: allocating fence memory\n");
725 
726 	/* allocate fence memory on the gart */
727 	retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
728 					sizeof(*dqm->fence_addr),
729 					32,
730 					KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
731 					(struct kgd_mem **) &dqm->fence_mem);
732 
733 	if (retval != 0)
734 		goto fail_allocate_vidmem;
735 
736 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
737 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
738 
739 	list_for_each_entry(node, &dqm->queues, list)
740 		if (node->qpd->pqm->process && dqm->dev)
741 			kfd_bind_process_to_device(dqm->dev,
742 						node->qpd->pqm->process);
743 
744 	execute_queues_cpsch(dqm, true);
745 
746 	return 0;
747 fail_allocate_vidmem:
748 fail_set_sched_resources:
749 	pm_uninit(&dqm->packets);
750 fail_packet_manager_init:
751 	return retval;
752 }
753 
754 static int stop_cpsch(struct device_queue_manager *dqm)
755 {
756 	struct device_process_node *node;
757 	struct kfd_process_device *pdd;
758 
759 	BUG_ON(!dqm);
760 
761 	destroy_queues_cpsch(dqm, true);
762 
763 	list_for_each_entry(node, &dqm->queues, list) {
764 		pdd = qpd_to_pdd(node->qpd);
765 		pdd->bound = false;
766 	}
767 	kfd2kgd->free_mem(dqm->dev->kgd,
768 			(struct kgd_mem *) dqm->fence_mem);
769 	pm_uninit(&dqm->packets);
770 
771 	return 0;
772 }
773 
774 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
775 					struct kernel_queue *kq,
776 					struct qcm_process_device *qpd)
777 {
778 	BUG_ON(!dqm || !kq || !qpd);
779 
780 	pr_debug("kfd: In func %s\n", __func__);
781 
782 	mutex_lock(&dqm->lock);
783 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
784 		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
785 				dqm->total_queue_count);
786 		mutex_unlock(&dqm->lock);
787 		return -EPERM;
788 	}
789 
790 	/*
791 	 * Unconditionally increment this counter, regardless of the queue's
792 	 * type or whether the queue is active.
793 	 */
794 	dqm->total_queue_count++;
795 	pr_debug("Total of %d queues are accountable so far\n",
796 			dqm->total_queue_count);
797 
798 	list_add(&kq->list, &qpd->priv_queue_list);
799 	dqm->queue_count++;
800 	qpd->is_debug = true;
801 	execute_queues_cpsch(dqm, false);
802 	mutex_unlock(&dqm->lock);
803 
804 	return 0;
805 }
806 
807 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
808 					struct kernel_queue *kq,
809 					struct qcm_process_device *qpd)
810 {
811 	BUG_ON(!dqm || !kq);
812 
813 	pr_debug("kfd: In %s\n", __func__);
814 
815 	mutex_lock(&dqm->lock);
816 	destroy_queues_cpsch(dqm, false);
817 	list_del(&kq->list);
818 	dqm->queue_count--;
819 	qpd->is_debug = false;
820 	execute_queues_cpsch(dqm, false);
821 	/*
822 	 * Unconditionally decrement this counter, regardless of the queue's
823 	 * type.
824 	 */
825 	dqm->total_queue_count++;
826 	pr_debug("Total of %d queues are accountable so far\n",
827 			dqm->total_queue_count);
828 	mutex_unlock(&dqm->lock);
829 }
830 
831 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
832 			struct qcm_process_device *qpd, int *allocate_vmid)
833 {
834 	int retval;
835 	struct mqd_manager *mqd;
836 
837 	BUG_ON(!dqm || !q || !qpd);
838 
839 	retval = 0;
840 
841 	if (allocate_vmid)
842 		*allocate_vmid = 0;
843 
844 	mutex_lock(&dqm->lock);
845 
846 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
847 		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
848 				dqm->total_queue_count);
849 		retval = -EPERM;
850 		goto out;
851 	}
852 
853 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
854 	if (mqd == NULL) {
855 		mutex_unlock(&dqm->lock);
856 		return -ENOMEM;
857 	}
858 
859 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
860 				&q->gart_mqd_addr, &q->properties);
861 	if (retval != 0)
862 		goto out;
863 
864 	list_add(&q->list, &qpd->queues_list);
865 	if (q->properties.is_active) {
866 		dqm->queue_count++;
867 		retval = execute_queues_cpsch(dqm, false);
868 	}
869 
870 	/*
871 	 * Unconditionally increment this counter, regardless of the queue's
872 	 * type or whether the queue is active.
873 	 */
874 	dqm->total_queue_count++;
875 
876 	pr_debug("Total of %d queues are accountable so far\n",
877 			dqm->total_queue_count);
878 
879 out:
880 	mutex_unlock(&dqm->lock);
881 	return retval;
882 }
883 
884 static int fence_wait_timeout(unsigned int *fence_addr,
885 				unsigned int fence_value,
886 				unsigned long timeout)
887 {
888 	BUG_ON(!fence_addr);
889 	timeout += jiffies;
890 
891 	while (*fence_addr != fence_value) {
892 		if (time_after(jiffies, timeout)) {
893 			pr_err("kfd: qcm fence wait loop timeout expired\n");
894 			return -ETIME;
895 		}
896 		cpu_relax();
897 	}
898 
899 	return 0;
900 }
901 
902 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
903 {
904 	int retval;
905 
906 	BUG_ON(!dqm);
907 
908 	retval = 0;
909 
910 	if (lock)
911 		mutex_lock(&dqm->lock);
912 	if (dqm->active_runlist == false)
913 		goto out;
914 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
915 			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
916 	if (retval != 0)
917 		goto out;
918 
919 	*dqm->fence_addr = KFD_FENCE_INIT;
920 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
921 				KFD_FENCE_COMPLETED);
922 	/* should be timed out */
923 	fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
924 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
925 	pm_release_ib(&dqm->packets);
926 	dqm->active_runlist = false;
927 
928 out:
929 	if (lock)
930 		mutex_unlock(&dqm->lock);
931 	return retval;
932 }
933 
934 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
935 {
936 	int retval;
937 
938 	BUG_ON(!dqm);
939 
940 	if (lock)
941 		mutex_lock(&dqm->lock);
942 
943 	retval = destroy_queues_cpsch(dqm, false);
944 	if (retval != 0) {
945 		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
946 		goto out;
947 	}
948 
949 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
950 		retval = 0;
951 		goto out;
952 	}
953 
954 	if (dqm->active_runlist) {
955 		retval = 0;
956 		goto out;
957 	}
958 
959 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
960 	if (retval != 0) {
961 		pr_err("kfd: failed to execute runlist");
962 		goto out;
963 	}
964 	dqm->active_runlist = true;
965 
966 out:
967 	if (lock)
968 		mutex_unlock(&dqm->lock);
969 	return retval;
970 }
971 
972 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
973 				struct qcm_process_device *qpd,
974 				struct queue *q)
975 {
976 	int retval;
977 	struct mqd_manager *mqd;
978 
979 	BUG_ON(!dqm || !qpd || !q);
980 
981 	retval = 0;
982 
983 	/* remove queue from list to prevent rescheduling after preemption */
984 	mutex_lock(&dqm->lock);
985 
986 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
987 	if (!mqd) {
988 		retval = -ENOMEM;
989 		goto failed;
990 	}
991 
992 	list_del(&q->list);
993 	dqm->queue_count--;
994 
995 	execute_queues_cpsch(dqm, false);
996 
997 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
998 
999 	/*
1000 	 * Unconditionally decrement this counter, regardless of the queue's
1001 	 * type
1002 	 */
1003 	dqm->total_queue_count--;
1004 	pr_debug("Total of %d queues are accountable so far\n",
1005 			dqm->total_queue_count);
1006 
1007 	mutex_unlock(&dqm->lock);
1008 
1009 	return 0;
1010 
1011 failed:
1012 	mutex_unlock(&dqm->lock);
1013 	return retval;
1014 }
1015 
1016 /*
1017  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1018  * stay in user mode.
1019  */
1020 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1021 /* APE1 limit is inclusive and 64K aligned. */
1022 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1023 
1024 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1025 				   struct qcm_process_device *qpd,
1026 				   enum cache_policy default_policy,
1027 				   enum cache_policy alternate_policy,
1028 				   void __user *alternate_aperture_base,
1029 				   uint64_t alternate_aperture_size)
1030 {
1031 	uint32_t default_mtype;
1032 	uint32_t ape1_mtype;
1033 
1034 	pr_debug("kfd: In func %s\n", __func__);
1035 
1036 	mutex_lock(&dqm->lock);
1037 
1038 	if (alternate_aperture_size == 0) {
1039 		/* base > limit disables APE1 */
1040 		qpd->sh_mem_ape1_base = 1;
1041 		qpd->sh_mem_ape1_limit = 0;
1042 	} else {
1043 		/*
1044 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1045 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1046 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1047 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1048 		 * Verify that the base and size parameters can be
1049 		 * represented in this format and convert them.
1050 		 * Additionally restrict APE1 to user-mode addresses.
1051 		 */
1052 
1053 		uint64_t base = (uintptr_t)alternate_aperture_base;
1054 		uint64_t limit = base + alternate_aperture_size - 1;
1055 
1056 		if (limit <= base)
1057 			goto out;
1058 
1059 		if ((base & APE1_FIXED_BITS_MASK) != 0)
1060 			goto out;
1061 
1062 		if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
1063 			goto out;
1064 
1065 		qpd->sh_mem_ape1_base = base >> 16;
1066 		qpd->sh_mem_ape1_limit = limit >> 16;
1067 	}
1068 
1069 	default_mtype = (default_policy == cache_policy_coherent) ?
1070 			MTYPE_NONCACHED :
1071 			MTYPE_CACHED;
1072 
1073 	ape1_mtype = (alternate_policy == cache_policy_coherent) ?
1074 			MTYPE_NONCACHED :
1075 			MTYPE_CACHED;
1076 
1077 	qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
1078 			| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
1079 			| DEFAULT_MTYPE(default_mtype)
1080 			| APE1_MTYPE(ape1_mtype);
1081 
1082 	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1083 		program_sh_mem_settings(dqm, qpd);
1084 
1085 	pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1086 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1087 		qpd->sh_mem_ape1_limit);
1088 
1089 	mutex_unlock(&dqm->lock);
1090 	return true;
1091 
1092 out:
1093 	mutex_unlock(&dqm->lock);
1094 	return false;
1095 }
1096 
1097 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1098 {
1099 	struct device_queue_manager *dqm;
1100 
1101 	BUG_ON(!dev);
1102 
1103 	dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
1104 	if (!dqm)
1105 		return NULL;
1106 
1107 	dqm->dev = dev;
1108 	switch (sched_policy) {
1109 	case KFD_SCHED_POLICY_HWS:
1110 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1111 		/* initialize dqm for cp scheduling */
1112 		dqm->create_queue = create_queue_cpsch;
1113 		dqm->initialize = initialize_cpsch;
1114 		dqm->start = start_cpsch;
1115 		dqm->stop = stop_cpsch;
1116 		dqm->destroy_queue = destroy_queue_cpsch;
1117 		dqm->update_queue = update_queue;
1118 		dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1119 		dqm->register_process = register_process_nocpsch;
1120 		dqm->unregister_process = unregister_process_nocpsch;
1121 		dqm->uninitialize = uninitialize_nocpsch;
1122 		dqm->create_kernel_queue = create_kernel_queue_cpsch;
1123 		dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
1124 		dqm->set_cache_memory_policy = set_cache_memory_policy;
1125 		break;
1126 	case KFD_SCHED_POLICY_NO_HWS:
1127 		/* initialize dqm for no cp scheduling */
1128 		dqm->start = start_nocpsch;
1129 		dqm->stop = stop_nocpsch;
1130 		dqm->create_queue = create_queue_nocpsch;
1131 		dqm->destroy_queue = destroy_queue_nocpsch;
1132 		dqm->update_queue = update_queue;
1133 		dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1134 		dqm->register_process = register_process_nocpsch;
1135 		dqm->unregister_process = unregister_process_nocpsch;
1136 		dqm->initialize = initialize_nocpsch;
1137 		dqm->uninitialize = uninitialize_nocpsch;
1138 		dqm->set_cache_memory_policy = set_cache_memory_policy;
1139 		break;
1140 	default:
1141 		BUG();
1142 		break;
1143 	}
1144 
1145 	if (dqm->initialize(dqm) != 0) {
1146 		kfree(dqm);
1147 		return NULL;
1148 	}
1149 
1150 	return dqm;
1151 }
1152 
1153 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1154 {
1155 	BUG_ON(!dqm);
1156 
1157 	dqm->uninitialize(dqm);
1158 	kfree(dqm);
1159 }
1160 
1161