1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/slab.h>
25 #include <linux/list.h>
26 #include <linux/types.h>
27 #include <linux/printk.h>
28 #include <linux/bitops.h>
29 #include "kfd_priv.h"
30 #include "kfd_device_queue_manager.h"
31 #include "kfd_mqd_manager.h"
32 #include "cik_regs.h"
33 #include "kfd_kernel_queue.h"
34 #include "../../radeon/cik_reg.h"
35 
36 /* Size of the per-pipe EOP queue */
37 #define CIK_HPD_EOP_BYTES_LOG2 11
38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39 
40 static bool is_mem_initialized;
41 
42 static int init_memory(struct device_queue_manager *dqm);
43 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
44 					unsigned int pasid, unsigned int vmid);
45 
46 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
47 					struct queue *q,
48 					struct qcm_process_device *qpd);
49 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
50 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
51 
52 
53 static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
54 {
55 	BUG_ON(!dqm || !dqm->dev);
56 	return dqm->dev->shared_resources.compute_pipe_count;
57 }
58 
59 static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
60 {
61 	BUG_ON(!dqm);
62 	return dqm->dev->shared_resources.first_compute_pipe;
63 }
64 
65 static inline unsigned int get_pipes_num_cpsch(void)
66 {
67 	return PIPE_PER_ME_CP_SCHEDULING;
68 }
69 
70 static inline unsigned int
71 get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
72 {
73 	uint32_t nybble;
74 
75 	nybble = (pdd->lds_base >> 60) & 0x0E;
76 
77 	return nybble;
78 
79 }
80 
81 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
82 {
83 	unsigned int shared_base;
84 
85 	shared_base = (pdd->lds_base >> 16) & 0xFF;
86 
87 	return shared_base;
88 }
89 
90 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
91 static void init_process_memory(struct device_queue_manager *dqm,
92 				struct qcm_process_device *qpd)
93 {
94 	struct kfd_process_device *pdd;
95 	unsigned int temp;
96 
97 	BUG_ON(!dqm || !qpd);
98 
99 	pdd = qpd_to_pdd(qpd);
100 
101 	/* check if sh_mem_config register already configured */
102 	if (qpd->sh_mem_config == 0) {
103 		qpd->sh_mem_config =
104 			ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
105 			DEFAULT_MTYPE(MTYPE_NONCACHED) |
106 			APE1_MTYPE(MTYPE_NONCACHED);
107 		qpd->sh_mem_ape1_limit = 0;
108 		qpd->sh_mem_ape1_base = 0;
109 	}
110 
111 	if (qpd->pqm->process->is_32bit_user_mode) {
112 		temp = get_sh_mem_bases_32(pdd);
113 		qpd->sh_mem_bases = SHARED_BASE(temp);
114 		qpd->sh_mem_config |= PTR32;
115 	} else {
116 		temp = get_sh_mem_bases_nybble_64(pdd);
117 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
118 	}
119 
120 	pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
121 		qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
122 }
123 
124 static void program_sh_mem_settings(struct device_queue_manager *dqm,
125 					struct qcm_process_device *qpd)
126 {
127 	return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
128 						qpd->sh_mem_config,
129 						qpd->sh_mem_ape1_base,
130 						qpd->sh_mem_ape1_limit,
131 						qpd->sh_mem_bases);
132 }
133 
134 static int allocate_vmid(struct device_queue_manager *dqm,
135 			struct qcm_process_device *qpd,
136 			struct queue *q)
137 {
138 	int bit, allocated_vmid;
139 
140 	if (dqm->vmid_bitmap == 0)
141 		return -ENOMEM;
142 
143 	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
144 	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
145 
146 	/* Kaveri kfd vmid's starts from vmid 8 */
147 	allocated_vmid = bit + KFD_VMID_START_OFFSET;
148 	pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
149 	qpd->vmid = allocated_vmid;
150 	q->properties.vmid = allocated_vmid;
151 
152 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
153 	program_sh_mem_settings(dqm, qpd);
154 
155 	return 0;
156 }
157 
158 static void deallocate_vmid(struct device_queue_manager *dqm,
159 				struct qcm_process_device *qpd,
160 				struct queue *q)
161 {
162 	int bit = qpd->vmid - KFD_VMID_START_OFFSET;
163 
164 	set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
165 	qpd->vmid = 0;
166 	q->properties.vmid = 0;
167 }
168 
169 static int create_queue_nocpsch(struct device_queue_manager *dqm,
170 				struct queue *q,
171 				struct qcm_process_device *qpd,
172 				int *allocated_vmid)
173 {
174 	int retval;
175 
176 	BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
177 
178 	pr_debug("kfd: In func %s\n", __func__);
179 	print_queue(q);
180 
181 	mutex_lock(&dqm->lock);
182 
183 	if (list_empty(&qpd->queues_list)) {
184 		retval = allocate_vmid(dqm, qpd, q);
185 		if (retval != 0) {
186 			mutex_unlock(&dqm->lock);
187 			return retval;
188 		}
189 	}
190 	*allocated_vmid = qpd->vmid;
191 	q->properties.vmid = qpd->vmid;
192 
193 	retval = create_compute_queue_nocpsch(dqm, q, qpd);
194 
195 	if (retval != 0) {
196 		if (list_empty(&qpd->queues_list)) {
197 			deallocate_vmid(dqm, qpd, q);
198 			*allocated_vmid = 0;
199 		}
200 		mutex_unlock(&dqm->lock);
201 		return retval;
202 	}
203 
204 	list_add(&q->list, &qpd->queues_list);
205 	dqm->queue_count++;
206 
207 	mutex_unlock(&dqm->lock);
208 	return 0;
209 }
210 
211 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
212 {
213 	bool set;
214 	int pipe, bit;
215 
216 	set = false;
217 
218 	for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
219 			pipe = (pipe + 1) % get_pipes_num(dqm)) {
220 		if (dqm->allocated_queues[pipe] != 0) {
221 			bit = find_first_bit(
222 				(unsigned long *)&dqm->allocated_queues[pipe],
223 				QUEUES_PER_PIPE);
224 
225 			clear_bit(bit,
226 				(unsigned long *)&dqm->allocated_queues[pipe]);
227 			q->pipe = pipe;
228 			q->queue = bit;
229 			set = true;
230 			break;
231 		}
232 	}
233 
234 	if (set == false)
235 		return -EBUSY;
236 
237 	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
238 				__func__, q->pipe, q->queue);
239 	/* horizontal hqd allocation */
240 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
241 
242 	return 0;
243 }
244 
245 static inline void deallocate_hqd(struct device_queue_manager *dqm,
246 				struct queue *q)
247 {
248 	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
249 }
250 
251 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
252 					struct queue *q,
253 					struct qcm_process_device *qpd)
254 {
255 	int retval;
256 	struct mqd_manager *mqd;
257 
258 	BUG_ON(!dqm || !q || !qpd);
259 
260 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
261 	if (mqd == NULL)
262 		return -ENOMEM;
263 
264 	retval = allocate_hqd(dqm, q);
265 	if (retval != 0)
266 		return retval;
267 
268 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
269 				&q->gart_mqd_addr, &q->properties);
270 	if (retval != 0) {
271 		deallocate_hqd(dqm, q);
272 		return retval;
273 	}
274 
275 	return 0;
276 }
277 
278 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
279 				struct qcm_process_device *qpd,
280 				struct queue *q)
281 {
282 	int retval;
283 	struct mqd_manager *mqd;
284 
285 	BUG_ON(!dqm || !q || !q->mqd || !qpd);
286 
287 	retval = 0;
288 
289 	pr_debug("kfd: In Func %s\n", __func__);
290 
291 	mutex_lock(&dqm->lock);
292 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
293 	if (mqd == NULL) {
294 		retval = -ENOMEM;
295 		goto out;
296 	}
297 
298 	retval = mqd->destroy_mqd(mqd, q->mqd,
299 				KFD_PREEMPT_TYPE_WAVEFRONT,
300 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
301 				q->pipe, q->queue);
302 
303 	if (retval != 0)
304 		goto out;
305 
306 	deallocate_hqd(dqm, q);
307 
308 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
309 
310 	list_del(&q->list);
311 	if (list_empty(&qpd->queues_list))
312 		deallocate_vmid(dqm, qpd, q);
313 	dqm->queue_count--;
314 out:
315 	mutex_unlock(&dqm->lock);
316 	return retval;
317 }
318 
319 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
320 {
321 	int retval;
322 	struct mqd_manager *mqd;
323 
324 	BUG_ON(!dqm || !q || !q->mqd);
325 
326 	mutex_lock(&dqm->lock);
327 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
328 	if (mqd == NULL) {
329 		mutex_unlock(&dqm->lock);
330 		return -ENOMEM;
331 	}
332 
333 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
334 	if (q->properties.is_active == true)
335 		dqm->queue_count++;
336 	else
337 		dqm->queue_count--;
338 
339 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
340 		retval = execute_queues_cpsch(dqm, false);
341 
342 	mutex_unlock(&dqm->lock);
343 	return retval;
344 }
345 
346 static struct mqd_manager *get_mqd_manager_nocpsch(
347 		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
348 {
349 	struct mqd_manager *mqd;
350 
351 	BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
352 
353 	pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
354 
355 	mqd = dqm->mqds[type];
356 	if (!mqd) {
357 		mqd = mqd_manager_init(type, dqm->dev);
358 		if (mqd == NULL)
359 			pr_err("kfd: mqd manager is NULL");
360 		dqm->mqds[type] = mqd;
361 	}
362 
363 	return mqd;
364 }
365 
366 static int register_process_nocpsch(struct device_queue_manager *dqm,
367 					struct qcm_process_device *qpd)
368 {
369 	struct device_process_node *n;
370 
371 	BUG_ON(!dqm || !qpd);
372 
373 	pr_debug("kfd: In func %s\n", __func__);
374 
375 	n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
376 	if (!n)
377 		return -ENOMEM;
378 
379 	n->qpd = qpd;
380 
381 	mutex_lock(&dqm->lock);
382 	list_add(&n->list, &dqm->queues);
383 
384 	init_process_memory(dqm, qpd);
385 	dqm->processes_count++;
386 
387 	mutex_unlock(&dqm->lock);
388 
389 	return 0;
390 }
391 
392 static int unregister_process_nocpsch(struct device_queue_manager *dqm,
393 					struct qcm_process_device *qpd)
394 {
395 	int retval;
396 	struct device_process_node *cur, *next;
397 
398 	BUG_ON(!dqm || !qpd);
399 
400 	BUG_ON(!list_empty(&qpd->queues_list));
401 
402 	pr_debug("kfd: In func %s\n", __func__);
403 
404 	retval = 0;
405 	mutex_lock(&dqm->lock);
406 
407 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
408 		if (qpd == cur->qpd) {
409 			list_del(&cur->list);
410 			kfree(cur);
411 			dqm->processes_count--;
412 			goto out;
413 		}
414 	}
415 	/* qpd not found in dqm list */
416 	retval = 1;
417 out:
418 	mutex_unlock(&dqm->lock);
419 	return retval;
420 }
421 
422 static int
423 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
424 			unsigned int vmid)
425 {
426 	uint32_t pasid_mapping;
427 
428 	pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
429 						ATC_VMID_PASID_MAPPING_VALID;
430 	return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,
431 						vmid);
432 }
433 
434 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
435 {
436 	/* In 64-bit mode, we can only control the top 3 bits of the LDS,
437 	 * scratch and GPUVM apertures.
438 	 * The hardware fills in the remaining 59 bits according to the
439 	 * following pattern:
440 	 * LDS:		X0000000'00000000 - X0000001'00000000 (4GB)
441 	 * Scratch:	X0000001'00000000 - X0000002'00000000 (4GB)
442 	 * GPUVM:	Y0010000'00000000 - Y0020000'00000000 (1TB)
443 	 *
444 	 * (where X/Y is the configurable nybble with the low-bit 0)
445 	 *
446 	 * LDS and scratch will have the same top nybble programmed in the
447 	 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
448 	 * GPUVM can have a different top nybble programmed in the
449 	 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
450 	 * We don't bother to support different top nybbles
451 	 * for LDS/Scratch and GPUVM.
452 	 */
453 
454 	BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
455 		top_address_nybble == 0);
456 
457 	return PRIVATE_BASE(top_address_nybble << 12) |
458 			SHARED_BASE(top_address_nybble << 12);
459 }
460 
461 static int init_memory(struct device_queue_manager *dqm)
462 {
463 	int i, retval;
464 
465 	for (i = 8; i < 16; i++)
466 		set_pasid_vmid_mapping(dqm, 0, i);
467 
468 	retval = kfd2kgd->init_memory(dqm->dev->kgd);
469 	if (retval == 0)
470 		is_mem_initialized = true;
471 	return retval;
472 }
473 
474 
475 static int init_pipelines(struct device_queue_manager *dqm,
476 			unsigned int pipes_num, unsigned int first_pipe)
477 {
478 	void *hpdptr;
479 	struct mqd_manager *mqd;
480 	unsigned int i, err, inx;
481 	uint64_t pipe_hpd_addr;
482 
483 	BUG_ON(!dqm || !dqm->dev);
484 
485 	pr_debug("kfd: In func %s\n", __func__);
486 
487 	/*
488 	 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
489 	 * The driver never accesses this memory after zeroing it.
490 	 * It doesn't even have to be saved/restored on suspend/resume
491 	 * because it contains no data when there are no active queues.
492 	 */
493 
494 	err = kfd2kgd->allocate_mem(dqm->dev->kgd,
495 				CIK_HPD_EOP_BYTES * pipes_num,
496 				PAGE_SIZE,
497 				KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
498 				(struct kgd_mem **) &dqm->pipeline_mem);
499 
500 	if (err) {
501 		pr_err("kfd: error allocate vidmem num pipes: %d\n",
502 			pipes_num);
503 		return -ENOMEM;
504 	}
505 
506 	hpdptr = dqm->pipeline_mem->cpu_ptr;
507 	dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
508 
509 	memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
510 
511 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
512 	if (mqd == NULL) {
513 		kfd2kgd->free_mem(dqm->dev->kgd,
514 				(struct kgd_mem *) dqm->pipeline_mem);
515 		return -ENOMEM;
516 	}
517 
518 	for (i = 0; i < pipes_num; i++) {
519 		inx = i + first_pipe;
520 		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
521 		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
522 		/* = log2(bytes/4)-1 */
523 		kfd2kgd->init_pipeline(dqm->dev->kgd, i,
524 				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
525 	}
526 
527 	return 0;
528 }
529 
530 
531 static int init_scheduler(struct device_queue_manager *dqm)
532 {
533 	int retval;
534 
535 	BUG_ON(!dqm);
536 
537 	pr_debug("kfd: In %s\n", __func__);
538 
539 	retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
540 	if (retval != 0)
541 		return retval;
542 
543 	retval = init_memory(dqm);
544 
545 	return retval;
546 }
547 
548 static int initialize_nocpsch(struct device_queue_manager *dqm)
549 {
550 	int i;
551 
552 	BUG_ON(!dqm);
553 
554 	pr_debug("kfd: In func %s num of pipes: %d\n",
555 			__func__, get_pipes_num(dqm));
556 
557 	mutex_init(&dqm->lock);
558 	INIT_LIST_HEAD(&dqm->queues);
559 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
560 	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
561 					sizeof(unsigned int), GFP_KERNEL);
562 	if (!dqm->allocated_queues) {
563 		mutex_destroy(&dqm->lock);
564 		return -ENOMEM;
565 	}
566 
567 	for (i = 0; i < get_pipes_num(dqm); i++)
568 		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
569 
570 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
571 
572 	init_scheduler(dqm);
573 	return 0;
574 }
575 
576 static void uninitialize_nocpsch(struct device_queue_manager *dqm)
577 {
578 	int i;
579 
580 	BUG_ON(!dqm);
581 
582 	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
583 
584 	kfree(dqm->allocated_queues);
585 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
586 		kfree(dqm->mqds[i]);
587 	mutex_destroy(&dqm->lock);
588 	kfd2kgd->free_mem(dqm->dev->kgd,
589 			(struct kgd_mem *) dqm->pipeline_mem);
590 }
591 
592 static int start_nocpsch(struct device_queue_manager *dqm)
593 {
594 	return 0;
595 }
596 
597 static int stop_nocpsch(struct device_queue_manager *dqm)
598 {
599 	return 0;
600 }
601 
602 /*
603  * Device Queue Manager implementation for cp scheduler
604  */
605 
606 static int set_sched_resources(struct device_queue_manager *dqm)
607 {
608 	struct scheduling_resources res;
609 	unsigned int queue_num, queue_mask;
610 
611 	BUG_ON(!dqm);
612 
613 	pr_debug("kfd: In func %s\n", __func__);
614 
615 	queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
616 	queue_mask = (1 << queue_num) - 1;
617 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
618 	res.vmid_mask <<= KFD_VMID_START_OFFSET;
619 	res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
620 	res.gws_mask = res.oac_mask = res.gds_heap_base =
621 						res.gds_heap_size = 0;
622 
623 	pr_debug("kfd: scheduling resources:\n"
624 			"      vmid mask: 0x%8X\n"
625 			"      queue mask: 0x%8llX\n",
626 			res.vmid_mask, res.queue_mask);
627 
628 	return pm_send_set_resources(&dqm->packets, &res);
629 }
630 
631 static int initialize_cpsch(struct device_queue_manager *dqm)
632 {
633 	int retval;
634 
635 	BUG_ON(!dqm);
636 
637 	pr_debug("kfd: In func %s num of pipes: %d\n",
638 			__func__, get_pipes_num_cpsch());
639 
640 	mutex_init(&dqm->lock);
641 	INIT_LIST_HEAD(&dqm->queues);
642 	dqm->queue_count = dqm->processes_count = 0;
643 	dqm->active_runlist = false;
644 	retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
645 	if (retval != 0)
646 		goto fail_init_pipelines;
647 
648 	return 0;
649 
650 fail_init_pipelines:
651 	mutex_destroy(&dqm->lock);
652 	return retval;
653 }
654 
655 static int start_cpsch(struct device_queue_manager *dqm)
656 {
657 	struct device_process_node *node;
658 	int retval;
659 
660 	BUG_ON(!dqm);
661 
662 	retval = 0;
663 
664 	retval = pm_init(&dqm->packets, dqm);
665 	if (retval != 0)
666 		goto fail_packet_manager_init;
667 
668 	retval = set_sched_resources(dqm);
669 	if (retval != 0)
670 		goto fail_set_sched_resources;
671 
672 	pr_debug("kfd: allocating fence memory\n");
673 
674 	/* allocate fence memory on the gart */
675 	retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
676 					sizeof(*dqm->fence_addr),
677 					32,
678 					KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
679 					(struct kgd_mem **) &dqm->fence_mem);
680 
681 	if (retval != 0)
682 		goto fail_allocate_vidmem;
683 
684 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
685 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
686 
687 	list_for_each_entry(node, &dqm->queues, list)
688 		if (node->qpd->pqm->process && dqm->dev)
689 			kfd_bind_process_to_device(dqm->dev,
690 						node->qpd->pqm->process);
691 
692 	execute_queues_cpsch(dqm, true);
693 
694 	return 0;
695 fail_allocate_vidmem:
696 fail_set_sched_resources:
697 	pm_uninit(&dqm->packets);
698 fail_packet_manager_init:
699 	return retval;
700 }
701 
702 static int stop_cpsch(struct device_queue_manager *dqm)
703 {
704 	struct device_process_node *node;
705 	struct kfd_process_device *pdd;
706 
707 	BUG_ON(!dqm);
708 
709 	destroy_queues_cpsch(dqm, true);
710 
711 	list_for_each_entry(node, &dqm->queues, list) {
712 		pdd = qpd_to_pdd(node->qpd);
713 		pdd->bound = false;
714 	}
715 	kfd2kgd->free_mem(dqm->dev->kgd,
716 			(struct kgd_mem *) dqm->fence_mem);
717 	pm_uninit(&dqm->packets);
718 
719 	return 0;
720 }
721 
722 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
723 					struct kernel_queue *kq,
724 					struct qcm_process_device *qpd)
725 {
726 	BUG_ON(!dqm || !kq || !qpd);
727 
728 	pr_debug("kfd: In func %s\n", __func__);
729 
730 	mutex_lock(&dqm->lock);
731 	list_add(&kq->list, &qpd->priv_queue_list);
732 	dqm->queue_count++;
733 	qpd->is_debug = true;
734 	execute_queues_cpsch(dqm, false);
735 	mutex_unlock(&dqm->lock);
736 
737 	return 0;
738 }
739 
740 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
741 					struct kernel_queue *kq,
742 					struct qcm_process_device *qpd)
743 {
744 	BUG_ON(!dqm || !kq);
745 
746 	pr_debug("kfd: In %s\n", __func__);
747 
748 	mutex_lock(&dqm->lock);
749 	destroy_queues_cpsch(dqm, false);
750 	list_del(&kq->list);
751 	dqm->queue_count--;
752 	qpd->is_debug = false;
753 	execute_queues_cpsch(dqm, false);
754 	mutex_unlock(&dqm->lock);
755 }
756 
757 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
758 			struct qcm_process_device *qpd, int *allocate_vmid)
759 {
760 	int retval;
761 	struct mqd_manager *mqd;
762 
763 	BUG_ON(!dqm || !q || !qpd);
764 
765 	retval = 0;
766 
767 	if (allocate_vmid)
768 		*allocate_vmid = 0;
769 
770 	mutex_lock(&dqm->lock);
771 
772 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
773 	if (mqd == NULL) {
774 		mutex_unlock(&dqm->lock);
775 		return -ENOMEM;
776 	}
777 
778 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
779 				&q->gart_mqd_addr, &q->properties);
780 	if (retval != 0)
781 		goto out;
782 
783 	list_add(&q->list, &qpd->queues_list);
784 	if (q->properties.is_active) {
785 		dqm->queue_count++;
786 		retval = execute_queues_cpsch(dqm, false);
787 	}
788 
789 out:
790 	mutex_unlock(&dqm->lock);
791 	return retval;
792 }
793 
794 static int fence_wait_timeout(unsigned int *fence_addr,
795 				unsigned int fence_value,
796 				unsigned long timeout)
797 {
798 	BUG_ON(!fence_addr);
799 	timeout += jiffies;
800 
801 	while (*fence_addr != fence_value) {
802 		if (time_after(jiffies, timeout)) {
803 			pr_err("kfd: qcm fence wait loop timeout expired\n");
804 			return -ETIME;
805 		}
806 		cpu_relax();
807 	}
808 
809 	return 0;
810 }
811 
812 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
813 {
814 	int retval;
815 
816 	BUG_ON(!dqm);
817 
818 	retval = 0;
819 
820 	if (lock)
821 		mutex_lock(&dqm->lock);
822 	if (dqm->active_runlist == false)
823 		goto out;
824 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
825 			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
826 	if (retval != 0)
827 		goto out;
828 
829 	*dqm->fence_addr = KFD_FENCE_INIT;
830 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
831 				KFD_FENCE_COMPLETED);
832 	/* should be timed out */
833 	fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
834 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
835 	pm_release_ib(&dqm->packets);
836 	dqm->active_runlist = false;
837 
838 out:
839 	if (lock)
840 		mutex_unlock(&dqm->lock);
841 	return retval;
842 }
843 
844 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
845 {
846 	int retval;
847 
848 	BUG_ON(!dqm);
849 
850 	if (lock)
851 		mutex_lock(&dqm->lock);
852 
853 	retval = destroy_queues_cpsch(dqm, false);
854 	if (retval != 0) {
855 		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
856 		goto out;
857 	}
858 
859 	if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
860 		retval = 0;
861 		goto out;
862 	}
863 
864 	if (dqm->active_runlist) {
865 		retval = 0;
866 		goto out;
867 	}
868 
869 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
870 	if (retval != 0) {
871 		pr_err("kfd: failed to execute runlist");
872 		goto out;
873 	}
874 	dqm->active_runlist = true;
875 
876 out:
877 	if (lock)
878 		mutex_unlock(&dqm->lock);
879 	return retval;
880 }
881 
882 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
883 				struct qcm_process_device *qpd,
884 				struct queue *q)
885 {
886 	int retval;
887 	struct mqd_manager *mqd;
888 
889 	BUG_ON(!dqm || !qpd || !q);
890 
891 	retval = 0;
892 
893 	/* remove queue from list to prevent rescheduling after preemption */
894 	mutex_lock(&dqm->lock);
895 
896 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
897 	if (!mqd) {
898 		retval = -ENOMEM;
899 		goto failed;
900 	}
901 
902 	list_del(&q->list);
903 	dqm->queue_count--;
904 
905 	execute_queues_cpsch(dqm, false);
906 
907 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
908 
909 	mutex_unlock(&dqm->lock);
910 
911 	return 0;
912 
913 failed:
914 	mutex_unlock(&dqm->lock);
915 	return retval;
916 }
917 
918 /*
919  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
920  * stay in user mode.
921  */
922 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
923 /* APE1 limit is inclusive and 64K aligned. */
924 #define APE1_LIMIT_ALIGNMENT 0xFFFF
925 
926 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
927 				   struct qcm_process_device *qpd,
928 				   enum cache_policy default_policy,
929 				   enum cache_policy alternate_policy,
930 				   void __user *alternate_aperture_base,
931 				   uint64_t alternate_aperture_size)
932 {
933 	uint32_t default_mtype;
934 	uint32_t ape1_mtype;
935 
936 	pr_debug("kfd: In func %s\n", __func__);
937 
938 	mutex_lock(&dqm->lock);
939 
940 	if (alternate_aperture_size == 0) {
941 		/* base > limit disables APE1 */
942 		qpd->sh_mem_ape1_base = 1;
943 		qpd->sh_mem_ape1_limit = 0;
944 	} else {
945 		/*
946 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
947 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
948 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
949 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
950 		 * Verify that the base and size parameters can be
951 		 * represented in this format and convert them.
952 		 * Additionally restrict APE1 to user-mode addresses.
953 		 */
954 
955 		uint64_t base = (uintptr_t)alternate_aperture_base;
956 		uint64_t limit = base + alternate_aperture_size - 1;
957 
958 		if (limit <= base)
959 			goto out;
960 
961 		if ((base & APE1_FIXED_BITS_MASK) != 0)
962 			goto out;
963 
964 		if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
965 			goto out;
966 
967 		qpd->sh_mem_ape1_base = base >> 16;
968 		qpd->sh_mem_ape1_limit = limit >> 16;
969 	}
970 
971 	default_mtype = (default_policy == cache_policy_coherent) ?
972 			MTYPE_NONCACHED :
973 			MTYPE_CACHED;
974 
975 	ape1_mtype = (alternate_policy == cache_policy_coherent) ?
976 			MTYPE_NONCACHED :
977 			MTYPE_CACHED;
978 
979 	qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
980 			| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
981 			| DEFAULT_MTYPE(default_mtype)
982 			| APE1_MTYPE(ape1_mtype);
983 
984 	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
985 		program_sh_mem_settings(dqm, qpd);
986 
987 	pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
988 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
989 		qpd->sh_mem_ape1_limit);
990 
991 	mutex_unlock(&dqm->lock);
992 	return true;
993 
994 out:
995 	mutex_unlock(&dqm->lock);
996 	return false;
997 }
998 
999 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1000 {
1001 	struct device_queue_manager *dqm;
1002 
1003 	BUG_ON(!dev);
1004 
1005 	dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
1006 	if (!dqm)
1007 		return NULL;
1008 
1009 	dqm->dev = dev;
1010 	switch (sched_policy) {
1011 	case KFD_SCHED_POLICY_HWS:
1012 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1013 		/* initialize dqm for cp scheduling */
1014 		dqm->create_queue = create_queue_cpsch;
1015 		dqm->initialize = initialize_cpsch;
1016 		dqm->start = start_cpsch;
1017 		dqm->stop = stop_cpsch;
1018 		dqm->destroy_queue = destroy_queue_cpsch;
1019 		dqm->update_queue = update_queue;
1020 		dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1021 		dqm->register_process = register_process_nocpsch;
1022 		dqm->unregister_process = unregister_process_nocpsch;
1023 		dqm->uninitialize = uninitialize_nocpsch;
1024 		dqm->create_kernel_queue = create_kernel_queue_cpsch;
1025 		dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
1026 		dqm->set_cache_memory_policy = set_cache_memory_policy;
1027 		break;
1028 	case KFD_SCHED_POLICY_NO_HWS:
1029 		/* initialize dqm for no cp scheduling */
1030 		dqm->start = start_nocpsch;
1031 		dqm->stop = stop_nocpsch;
1032 		dqm->create_queue = create_queue_nocpsch;
1033 		dqm->destroy_queue = destroy_queue_nocpsch;
1034 		dqm->update_queue = update_queue;
1035 		dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1036 		dqm->register_process = register_process_nocpsch;
1037 		dqm->unregister_process = unregister_process_nocpsch;
1038 		dqm->initialize = initialize_nocpsch;
1039 		dqm->uninitialize = uninitialize_nocpsch;
1040 		dqm->set_cache_memory_policy = set_cache_memory_policy;
1041 		break;
1042 	default:
1043 		BUG();
1044 		break;
1045 	}
1046 
1047 	if (dqm->initialize(dqm) != 0) {
1048 		kfree(dqm);
1049 		return NULL;
1050 	}
1051 
1052 	return dqm;
1053 }
1054 
1055 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1056 {
1057 	BUG_ON(!dqm);
1058 
1059 	dqm->uninitialize(dqm);
1060 	kfree(dqm);
1061 }
1062 
1063