1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 
35 struct mm_struct;
36 
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40 #include "kfd_iommu.h"
41 
42 /*
43  * List of struct kfd_process (field kfd_process).
44  * Unique/indexed by mm_struct*
45  */
46 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
47 static DEFINE_MUTEX(kfd_processes_mutex);
48 
49 DEFINE_SRCU(kfd_processes_srcu);
50 
51 /* For process termination handling */
52 static struct workqueue_struct *kfd_process_wq;
53 
54 /* Ordered, single-threaded workqueue for restoring evicted
55  * processes. Restoring multiple processes concurrently under memory
56  * pressure can lead to processes blocking each other from validating
57  * their BOs and result in a live-lock situation where processes
58  * remain evicted indefinitely.
59  */
60 static struct workqueue_struct *kfd_restore_wq;
61 
62 static struct kfd_process *find_process(const struct task_struct *thread);
63 static void kfd_process_ref_release(struct kref *ref);
64 static struct kfd_process *create_process(const struct task_struct *thread,
65 					struct file *filep);
66 
67 static void evict_process_worker(struct work_struct *work);
68 static void restore_process_worker(struct work_struct *work);
69 
70 
71 int kfd_process_create_wq(void)
72 {
73 	if (!kfd_process_wq)
74 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
75 	if (!kfd_restore_wq)
76 		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
77 
78 	if (!kfd_process_wq || !kfd_restore_wq) {
79 		kfd_process_destroy_wq();
80 		return -ENOMEM;
81 	}
82 
83 	return 0;
84 }
85 
86 void kfd_process_destroy_wq(void)
87 {
88 	if (kfd_process_wq) {
89 		destroy_workqueue(kfd_process_wq);
90 		kfd_process_wq = NULL;
91 	}
92 	if (kfd_restore_wq) {
93 		destroy_workqueue(kfd_restore_wq);
94 		kfd_restore_wq = NULL;
95 	}
96 }
97 
98 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
99 			struct kfd_process_device *pdd)
100 {
101 	struct kfd_dev *dev = pdd->dev;
102 
103 	dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
104 	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
105 }
106 
107 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
108  *	This function should be only called right after the process
109  *	is created and when kfd_processes_mutex is still being held
110  *	to avoid concurrency. Because of that exclusiveness, we do
111  *	not need to take p->mutex.
112  */
113 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
114 				   uint64_t gpu_va, uint32_t size,
115 				   uint32_t flags, void **kptr)
116 {
117 	struct kfd_dev *kdev = pdd->dev;
118 	struct kgd_mem *mem = NULL;
119 	int handle;
120 	int err;
121 
122 	err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
123 						 pdd->vm, &mem, NULL, flags);
124 	if (err)
125 		goto err_alloc_mem;
126 
127 	err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
128 	if (err)
129 		goto err_map_mem;
130 
131 	err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
132 	if (err) {
133 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
134 		goto sync_memory_failed;
135 	}
136 
137 	/* Create an obj handle so kfd_process_device_remove_obj_handle
138 	 * will take care of the bo removal when the process finishes.
139 	 * We do not need to take p->mutex, because the process is just
140 	 * created and the ioctls have not had the chance to run.
141 	 */
142 	handle = kfd_process_device_create_obj_handle(pdd, mem);
143 
144 	if (handle < 0) {
145 		err = handle;
146 		goto free_gpuvm;
147 	}
148 
149 	if (kptr) {
150 		err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
151 				(struct kgd_mem *)mem, kptr, NULL);
152 		if (err) {
153 			pr_debug("Map GTT BO to kernel failed\n");
154 			goto free_obj_handle;
155 		}
156 	}
157 
158 	return err;
159 
160 free_obj_handle:
161 	kfd_process_device_remove_obj_handle(pdd, handle);
162 free_gpuvm:
163 sync_memory_failed:
164 	kfd_process_free_gpuvm(mem, pdd);
165 	return err;
166 
167 err_map_mem:
168 	kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
169 err_alloc_mem:
170 	*kptr = NULL;
171 	return err;
172 }
173 
174 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
175  *	process for IB usage The memory reserved is for KFD to submit
176  *	IB to AMDGPU from kernel.  If the memory is reserved
177  *	successfully, ib_kaddr will have the CPU/kernel
178  *	address. Check ib_kaddr before accessing the memory.
179  */
180 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
181 {
182 	struct qcm_process_device *qpd = &pdd->qpd;
183 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
184 			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
185 			 ALLOC_MEM_FLAGS_WRITABLE |
186 			 ALLOC_MEM_FLAGS_EXECUTABLE;
187 	void *kaddr;
188 	int ret;
189 
190 	if (qpd->ib_kaddr || !qpd->ib_base)
191 		return 0;
192 
193 	/* ib_base is only set for dGPU */
194 	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
195 				      &kaddr);
196 	if (ret)
197 		return ret;
198 
199 	qpd->ib_kaddr = kaddr;
200 
201 	return 0;
202 }
203 
204 struct kfd_process *kfd_create_process(struct file *filep)
205 {
206 	struct kfd_process *process;
207 	struct task_struct *thread = current;
208 
209 	if (!thread->mm)
210 		return ERR_PTR(-EINVAL);
211 
212 	/* Only the pthreads threading model is supported. */
213 	if (thread->group_leader->mm != thread->mm)
214 		return ERR_PTR(-EINVAL);
215 
216 	/*
217 	 * take kfd processes mutex before starting of process creation
218 	 * so there won't be a case where two threads of the same process
219 	 * create two kfd_process structures
220 	 */
221 	mutex_lock(&kfd_processes_mutex);
222 
223 	/* A prior open of /dev/kfd could have already created the process. */
224 	process = find_process(thread);
225 	if (process)
226 		pr_debug("Process already found\n");
227 	else
228 		process = create_process(thread, filep);
229 
230 	mutex_unlock(&kfd_processes_mutex);
231 
232 	return process;
233 }
234 
235 struct kfd_process *kfd_get_process(const struct task_struct *thread)
236 {
237 	struct kfd_process *process;
238 
239 	if (!thread->mm)
240 		return ERR_PTR(-EINVAL);
241 
242 	/* Only the pthreads threading model is supported. */
243 	if (thread->group_leader->mm != thread->mm)
244 		return ERR_PTR(-EINVAL);
245 
246 	process = find_process(thread);
247 	if (!process)
248 		return ERR_PTR(-EINVAL);
249 
250 	return process;
251 }
252 
253 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
254 {
255 	struct kfd_process *process;
256 
257 	hash_for_each_possible_rcu(kfd_processes_table, process,
258 					kfd_processes, (uintptr_t)mm)
259 		if (process->mm == mm)
260 			return process;
261 
262 	return NULL;
263 }
264 
265 static struct kfd_process *find_process(const struct task_struct *thread)
266 {
267 	struct kfd_process *p;
268 	int idx;
269 
270 	idx = srcu_read_lock(&kfd_processes_srcu);
271 	p = find_process_by_mm(thread->mm);
272 	srcu_read_unlock(&kfd_processes_srcu, idx);
273 
274 	return p;
275 }
276 
277 void kfd_unref_process(struct kfd_process *p)
278 {
279 	kref_put(&p->ref, kfd_process_ref_release);
280 }
281 
282 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
283 {
284 	struct kfd_process *p = pdd->process;
285 	void *mem;
286 	int id;
287 
288 	/*
289 	 * Remove all handles from idr and release appropriate
290 	 * local memory object
291 	 */
292 	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
293 		struct kfd_process_device *peer_pdd;
294 
295 		list_for_each_entry(peer_pdd, &p->per_device_data,
296 				    per_device_list) {
297 			if (!peer_pdd->vm)
298 				continue;
299 			peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
300 				peer_pdd->dev->kgd, mem, peer_pdd->vm);
301 		}
302 
303 		pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
304 		kfd_process_device_remove_obj_handle(pdd, id);
305 	}
306 }
307 
308 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
309 {
310 	struct kfd_process_device *pdd;
311 
312 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
313 		kfd_process_device_free_bos(pdd);
314 }
315 
316 static void kfd_process_destroy_pdds(struct kfd_process *p)
317 {
318 	struct kfd_process_device *pdd, *temp;
319 
320 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
321 				 per_device_list) {
322 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
323 				pdd->dev->id, p->pasid);
324 
325 		if (pdd->drm_file) {
326 			pdd->dev->kfd2kgd->release_process_vm(pdd->dev->kgd, pdd->vm);
327 			fput(pdd->drm_file);
328 		}
329 		else if (pdd->vm)
330 			pdd->dev->kfd2kgd->destroy_process_vm(
331 				pdd->dev->kgd, pdd->vm);
332 
333 		list_del(&pdd->per_device_list);
334 
335 		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
336 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
337 				get_order(KFD_CWSR_TBA_TMA_SIZE));
338 
339 		kfree(pdd->qpd.doorbell_bitmap);
340 		idr_destroy(&pdd->alloc_idr);
341 
342 		kfree(pdd);
343 	}
344 }
345 
346 /* No process locking is needed in this function, because the process
347  * is not findable any more. We must assume that no other thread is
348  * using it any more, otherwise we couldn't safely free the process
349  * structure in the end.
350  */
351 static void kfd_process_wq_release(struct work_struct *work)
352 {
353 	struct kfd_process *p = container_of(work, struct kfd_process,
354 					     release_work);
355 
356 	kfd_iommu_unbind_process(p);
357 
358 	kfd_process_free_outstanding_kfd_bos(p);
359 
360 	kfd_process_destroy_pdds(p);
361 	dma_fence_put(p->ef);
362 
363 	kfd_event_free_process(p);
364 
365 	kfd_pasid_free(p->pasid);
366 	kfd_free_process_doorbells(p);
367 
368 	mutex_destroy(&p->mutex);
369 
370 	put_task_struct(p->lead_thread);
371 
372 	kfree(p);
373 }
374 
375 static void kfd_process_ref_release(struct kref *ref)
376 {
377 	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
378 
379 	INIT_WORK(&p->release_work, kfd_process_wq_release);
380 	queue_work(kfd_process_wq, &p->release_work);
381 }
382 
383 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
384 {
385 	struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
386 
387 	kfd_unref_process(p);
388 }
389 
390 static void kfd_process_notifier_release(struct mmu_notifier *mn,
391 					struct mm_struct *mm)
392 {
393 	struct kfd_process *p;
394 	struct kfd_process_device *pdd = NULL;
395 
396 	/*
397 	 * The kfd_process structure can not be free because the
398 	 * mmu_notifier srcu is read locked
399 	 */
400 	p = container_of(mn, struct kfd_process, mmu_notifier);
401 	if (WARN_ON(p->mm != mm))
402 		return;
403 
404 	mutex_lock(&kfd_processes_mutex);
405 	hash_del_rcu(&p->kfd_processes);
406 	mutex_unlock(&kfd_processes_mutex);
407 	synchronize_srcu(&kfd_processes_srcu);
408 
409 	cancel_delayed_work_sync(&p->eviction_work);
410 	cancel_delayed_work_sync(&p->restore_work);
411 
412 	mutex_lock(&p->mutex);
413 
414 	/* Iterate over all process device data structures and if the
415 	 * pdd is in debug mode, we should first force unregistration,
416 	 * then we will be able to destroy the queues
417 	 */
418 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
419 		struct kfd_dev *dev = pdd->dev;
420 
421 		mutex_lock(kfd_get_dbgmgr_mutex());
422 		if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
423 			if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
424 				kfd_dbgmgr_destroy(dev->dbgmgr);
425 				dev->dbgmgr = NULL;
426 			}
427 		}
428 		mutex_unlock(kfd_get_dbgmgr_mutex());
429 	}
430 
431 	kfd_process_dequeue_from_all_devices(p);
432 	pqm_uninit(&p->pqm);
433 
434 	/* Indicate to other users that MM is no longer valid */
435 	p->mm = NULL;
436 
437 	mutex_unlock(&p->mutex);
438 
439 	mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
440 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
441 }
442 
443 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
444 	.release = kfd_process_notifier_release,
445 };
446 
447 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
448 {
449 	unsigned long  offset;
450 	struct kfd_process_device *pdd;
451 
452 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
453 		struct kfd_dev *dev = pdd->dev;
454 		struct qcm_process_device *qpd = &pdd->qpd;
455 
456 		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
457 			continue;
458 
459 		offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
460 			<< PAGE_SHIFT;
461 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
462 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
463 			MAP_SHARED, offset);
464 
465 		if (IS_ERR_VALUE(qpd->tba_addr)) {
466 			int err = qpd->tba_addr;
467 
468 			pr_err("Failure to set tba address. error %d.\n", err);
469 			qpd->tba_addr = 0;
470 			qpd->cwsr_kaddr = NULL;
471 			return err;
472 		}
473 
474 		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
475 
476 		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
477 		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
478 			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
479 	}
480 
481 	return 0;
482 }
483 
484 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
485 {
486 	struct kfd_dev *dev = pdd->dev;
487 	struct qcm_process_device *qpd = &pdd->qpd;
488 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
489 		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
490 	void *kaddr;
491 	int ret;
492 
493 	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
494 		return 0;
495 
496 	/* cwsr_base is only set for dGPU */
497 	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
498 				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
499 	if (ret)
500 		return ret;
501 
502 	qpd->cwsr_kaddr = kaddr;
503 	qpd->tba_addr = qpd->cwsr_base;
504 
505 	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
506 
507 	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
508 	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
509 		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
510 
511 	return 0;
512 }
513 
514 static struct kfd_process *create_process(const struct task_struct *thread,
515 					struct file *filep)
516 {
517 	struct kfd_process *process;
518 	int err = -ENOMEM;
519 
520 	process = kzalloc(sizeof(*process), GFP_KERNEL);
521 
522 	if (!process)
523 		goto err_alloc_process;
524 
525 	process->pasid = kfd_pasid_alloc();
526 	if (process->pasid == 0)
527 		goto err_alloc_pasid;
528 
529 	if (kfd_alloc_process_doorbells(process) < 0)
530 		goto err_alloc_doorbells;
531 
532 	kref_init(&process->ref);
533 
534 	mutex_init(&process->mutex);
535 
536 	process->mm = thread->mm;
537 
538 	/* register notifier */
539 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
540 	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
541 	if (err)
542 		goto err_mmu_notifier;
543 
544 	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
545 			(uintptr_t)process->mm);
546 
547 	process->lead_thread = thread->group_leader;
548 	get_task_struct(process->lead_thread);
549 
550 	INIT_LIST_HEAD(&process->per_device_data);
551 
552 	kfd_event_init_process(process);
553 
554 	err = pqm_init(&process->pqm, process);
555 	if (err != 0)
556 		goto err_process_pqm_init;
557 
558 	/* init process apertures*/
559 	process->is_32bit_user_mode = in_compat_syscall();
560 	err = kfd_init_apertures(process);
561 	if (err != 0)
562 		goto err_init_apertures;
563 
564 	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
565 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
566 	process->last_restore_timestamp = get_jiffies_64();
567 
568 	err = kfd_process_init_cwsr_apu(process, filep);
569 	if (err)
570 		goto err_init_cwsr;
571 
572 	return process;
573 
574 err_init_cwsr:
575 	kfd_process_free_outstanding_kfd_bos(process);
576 	kfd_process_destroy_pdds(process);
577 err_init_apertures:
578 	pqm_uninit(&process->pqm);
579 err_process_pqm_init:
580 	hash_del_rcu(&process->kfd_processes);
581 	synchronize_rcu();
582 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
583 err_mmu_notifier:
584 	mutex_destroy(&process->mutex);
585 	kfd_free_process_doorbells(process);
586 err_alloc_doorbells:
587 	kfd_pasid_free(process->pasid);
588 err_alloc_pasid:
589 	kfree(process);
590 err_alloc_process:
591 	return ERR_PTR(err);
592 }
593 
594 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
595 			struct kfd_dev *dev)
596 {
597 	unsigned int i;
598 
599 	if (!KFD_IS_SOC15(dev->device_info->asic_family))
600 		return 0;
601 
602 	qpd->doorbell_bitmap =
603 		kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
604 				     BITS_PER_BYTE), GFP_KERNEL);
605 	if (!qpd->doorbell_bitmap)
606 		return -ENOMEM;
607 
608 	/* Mask out any reserved doorbells */
609 	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
610 		if ((dev->shared_resources.reserved_doorbell_mask & i) ==
611 		    dev->shared_resources.reserved_doorbell_val) {
612 			set_bit(i, qpd->doorbell_bitmap);
613 			pr_debug("reserved doorbell 0x%03x\n", i);
614 		}
615 
616 	return 0;
617 }
618 
619 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
620 							struct kfd_process *p)
621 {
622 	struct kfd_process_device *pdd = NULL;
623 
624 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
625 		if (pdd->dev == dev)
626 			return pdd;
627 
628 	return NULL;
629 }
630 
631 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
632 							struct kfd_process *p)
633 {
634 	struct kfd_process_device *pdd = NULL;
635 
636 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
637 	if (!pdd)
638 		return NULL;
639 
640 	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
641 		pr_err("Failed to init doorbell for process\n");
642 		kfree(pdd);
643 		return NULL;
644 	}
645 
646 	pdd->dev = dev;
647 	INIT_LIST_HEAD(&pdd->qpd.queues_list);
648 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
649 	pdd->qpd.dqm = dev->dqm;
650 	pdd->qpd.pqm = &p->pqm;
651 	pdd->qpd.evicted = 0;
652 	pdd->process = p;
653 	pdd->bound = PDD_UNBOUND;
654 	pdd->already_dequeued = false;
655 	list_add(&pdd->per_device_list, &p->per_device_data);
656 
657 	/* Init idr used for memory handle translation */
658 	idr_init(&pdd->alloc_idr);
659 
660 	return pdd;
661 }
662 
663 /**
664  * kfd_process_device_init_vm - Initialize a VM for a process-device
665  *
666  * @pdd: The process-device
667  * @drm_file: Optional pointer to a DRM file descriptor
668  *
669  * If @drm_file is specified, it will be used to acquire the VM from
670  * that file descriptor. If successful, the @pdd takes ownership of
671  * the file descriptor.
672  *
673  * If @drm_file is NULL, a new VM is created.
674  *
675  * Returns 0 on success, -errno on failure.
676  */
677 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
678 			       struct file *drm_file)
679 {
680 	struct kfd_process *p;
681 	struct kfd_dev *dev;
682 	int ret;
683 
684 	if (pdd->vm)
685 		return drm_file ? -EBUSY : 0;
686 
687 	p = pdd->process;
688 	dev = pdd->dev;
689 
690 	if (drm_file)
691 		ret = dev->kfd2kgd->acquire_process_vm(
692 			dev->kgd, drm_file, p->pasid,
693 			&pdd->vm, &p->kgd_process_info, &p->ef);
694 	else
695 		ret = dev->kfd2kgd->create_process_vm(
696 			dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef);
697 	if (ret) {
698 		pr_err("Failed to create process VM object\n");
699 		return ret;
700 	}
701 
702 	ret = kfd_process_device_reserve_ib_mem(pdd);
703 	if (ret)
704 		goto err_reserve_ib_mem;
705 	ret = kfd_process_device_init_cwsr_dgpu(pdd);
706 	if (ret)
707 		goto err_init_cwsr;
708 
709 	pdd->drm_file = drm_file;
710 
711 	return 0;
712 
713 err_init_cwsr:
714 err_reserve_ib_mem:
715 	kfd_process_device_free_bos(pdd);
716 	if (!drm_file)
717 		dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
718 	pdd->vm = NULL;
719 
720 	return ret;
721 }
722 
723 /*
724  * Direct the IOMMU to bind the process (specifically the pasid->mm)
725  * to the device.
726  * Unbinding occurs when the process dies or the device is removed.
727  *
728  * Assumes that the process lock is held.
729  */
730 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
731 							struct kfd_process *p)
732 {
733 	struct kfd_process_device *pdd;
734 	int err;
735 
736 	pdd = kfd_get_process_device_data(dev, p);
737 	if (!pdd) {
738 		pr_err("Process device data doesn't exist\n");
739 		return ERR_PTR(-ENOMEM);
740 	}
741 
742 	err = kfd_iommu_bind_process_to_device(pdd);
743 	if (err)
744 		return ERR_PTR(err);
745 
746 	err = kfd_process_device_init_vm(pdd, NULL);
747 	if (err)
748 		return ERR_PTR(err);
749 
750 	return pdd;
751 }
752 
753 struct kfd_process_device *kfd_get_first_process_device_data(
754 						struct kfd_process *p)
755 {
756 	return list_first_entry(&p->per_device_data,
757 				struct kfd_process_device,
758 				per_device_list);
759 }
760 
761 struct kfd_process_device *kfd_get_next_process_device_data(
762 						struct kfd_process *p,
763 						struct kfd_process_device *pdd)
764 {
765 	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
766 		return NULL;
767 	return list_next_entry(pdd, per_device_list);
768 }
769 
770 bool kfd_has_process_device_data(struct kfd_process *p)
771 {
772 	return !(list_empty(&p->per_device_data));
773 }
774 
775 /* Create specific handle mapped to mem from process local memory idr
776  * Assumes that the process lock is held.
777  */
778 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
779 					void *mem)
780 {
781 	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
782 }
783 
784 /* Translate specific handle from process local memory idr
785  * Assumes that the process lock is held.
786  */
787 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
788 					int handle)
789 {
790 	if (handle < 0)
791 		return NULL;
792 
793 	return idr_find(&pdd->alloc_idr, handle);
794 }
795 
796 /* Remove specific handle from process local memory idr
797  * Assumes that the process lock is held.
798  */
799 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
800 					int handle)
801 {
802 	if (handle >= 0)
803 		idr_remove(&pdd->alloc_idr, handle);
804 }
805 
806 /* This increments the process->ref counter. */
807 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
808 {
809 	struct kfd_process *p, *ret_p = NULL;
810 	unsigned int temp;
811 
812 	int idx = srcu_read_lock(&kfd_processes_srcu);
813 
814 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
815 		if (p->pasid == pasid) {
816 			kref_get(&p->ref);
817 			ret_p = p;
818 			break;
819 		}
820 	}
821 
822 	srcu_read_unlock(&kfd_processes_srcu, idx);
823 
824 	return ret_p;
825 }
826 
827 /* This increments the process->ref counter. */
828 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
829 {
830 	struct kfd_process *p;
831 
832 	int idx = srcu_read_lock(&kfd_processes_srcu);
833 
834 	p = find_process_by_mm(mm);
835 	if (p)
836 		kref_get(&p->ref);
837 
838 	srcu_read_unlock(&kfd_processes_srcu, idx);
839 
840 	return p;
841 }
842 
843 /* process_evict_queues - Evict all user queues of a process
844  *
845  * Eviction is reference-counted per process-device. This means multiple
846  * evictions from different sources can be nested safely.
847  */
848 int kfd_process_evict_queues(struct kfd_process *p)
849 {
850 	struct kfd_process_device *pdd;
851 	int r = 0;
852 	unsigned int n_evicted = 0;
853 
854 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
855 		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
856 							    &pdd->qpd);
857 		if (r) {
858 			pr_err("Failed to evict process queues\n");
859 			goto fail;
860 		}
861 		n_evicted++;
862 	}
863 
864 	return r;
865 
866 fail:
867 	/* To keep state consistent, roll back partial eviction by
868 	 * restoring queues
869 	 */
870 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
871 		if (n_evicted == 0)
872 			break;
873 		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
874 							      &pdd->qpd))
875 			pr_err("Failed to restore queues\n");
876 
877 		n_evicted--;
878 	}
879 
880 	return r;
881 }
882 
883 /* process_restore_queues - Restore all user queues of a process */
884 int kfd_process_restore_queues(struct kfd_process *p)
885 {
886 	struct kfd_process_device *pdd;
887 	int r, ret = 0;
888 
889 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
890 		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
891 							      &pdd->qpd);
892 		if (r) {
893 			pr_err("Failed to restore process queues\n");
894 			if (!ret)
895 				ret = r;
896 		}
897 	}
898 
899 	return ret;
900 }
901 
902 static void evict_process_worker(struct work_struct *work)
903 {
904 	int ret;
905 	struct kfd_process *p;
906 	struct delayed_work *dwork;
907 
908 	dwork = to_delayed_work(work);
909 
910 	/* Process termination destroys this worker thread. So during the
911 	 * lifetime of this thread, kfd_process p will be valid
912 	 */
913 	p = container_of(dwork, struct kfd_process, eviction_work);
914 	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
915 		  "Eviction fence mismatch\n");
916 
917 	/* Narrow window of overlap between restore and evict work
918 	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
919 	 * unreserves KFD BOs, it is possible to evicted again. But
920 	 * restore has few more steps of finish. So lets wait for any
921 	 * previous restore work to complete
922 	 */
923 	flush_delayed_work(&p->restore_work);
924 
925 	pr_debug("Started evicting pasid %d\n", p->pasid);
926 	ret = kfd_process_evict_queues(p);
927 	if (!ret) {
928 		dma_fence_signal(p->ef);
929 		dma_fence_put(p->ef);
930 		p->ef = NULL;
931 		queue_delayed_work(kfd_restore_wq, &p->restore_work,
932 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
933 
934 		pr_debug("Finished evicting pasid %d\n", p->pasid);
935 	} else
936 		pr_err("Failed to evict queues of pasid %d\n", p->pasid);
937 }
938 
939 static void restore_process_worker(struct work_struct *work)
940 {
941 	struct delayed_work *dwork;
942 	struct kfd_process *p;
943 	struct kfd_process_device *pdd;
944 	int ret = 0;
945 
946 	dwork = to_delayed_work(work);
947 
948 	/* Process termination destroys this worker thread. So during the
949 	 * lifetime of this thread, kfd_process p will be valid
950 	 */
951 	p = container_of(dwork, struct kfd_process, restore_work);
952 
953 	/* Call restore_process_bos on the first KGD device. This function
954 	 * takes care of restoring the whole process including other devices.
955 	 * Restore can fail if enough memory is not available. If so,
956 	 * reschedule again.
957 	 */
958 	pdd = list_first_entry(&p->per_device_data,
959 			       struct kfd_process_device,
960 			       per_device_list);
961 
962 	pr_debug("Started restoring pasid %d\n", p->pasid);
963 
964 	/* Setting last_restore_timestamp before successful restoration.
965 	 * Otherwise this would have to be set by KGD (restore_process_bos)
966 	 * before KFD BOs are unreserved. If not, the process can be evicted
967 	 * again before the timestamp is set.
968 	 * If restore fails, the timestamp will be set again in the next
969 	 * attempt. This would mean that the minimum GPU quanta would be
970 	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
971 	 * functions)
972 	 */
973 
974 	p->last_restore_timestamp = get_jiffies_64();
975 	ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
976 						     &p->ef);
977 	if (ret) {
978 		pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
979 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
980 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
981 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
982 		WARN(!ret, "reschedule restore work failed\n");
983 		return;
984 	}
985 
986 	ret = kfd_process_restore_queues(p);
987 	if (!ret)
988 		pr_debug("Finished restoring pasid %d\n", p->pasid);
989 	else
990 		pr_err("Failed to restore queues of pasid %d\n", p->pasid);
991 }
992 
993 void kfd_suspend_all_processes(void)
994 {
995 	struct kfd_process *p;
996 	unsigned int temp;
997 	int idx = srcu_read_lock(&kfd_processes_srcu);
998 
999 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1000 		cancel_delayed_work_sync(&p->eviction_work);
1001 		cancel_delayed_work_sync(&p->restore_work);
1002 
1003 		if (kfd_process_evict_queues(p))
1004 			pr_err("Failed to suspend process %d\n", p->pasid);
1005 		dma_fence_signal(p->ef);
1006 		dma_fence_put(p->ef);
1007 		p->ef = NULL;
1008 	}
1009 	srcu_read_unlock(&kfd_processes_srcu, idx);
1010 }
1011 
1012 int kfd_resume_all_processes(void)
1013 {
1014 	struct kfd_process *p;
1015 	unsigned int temp;
1016 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1017 
1018 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1019 		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1020 			pr_err("Restore process %d failed during resume\n",
1021 			       p->pasid);
1022 			ret = -EFAULT;
1023 		}
1024 	}
1025 	srcu_read_unlock(&kfd_processes_srcu, idx);
1026 	return ret;
1027 }
1028 
1029 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1030 			  struct vm_area_struct *vma)
1031 {
1032 	struct kfd_process_device *pdd;
1033 	struct qcm_process_device *qpd;
1034 
1035 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1036 		pr_err("Incorrect CWSR mapping size.\n");
1037 		return -EINVAL;
1038 	}
1039 
1040 	pdd = kfd_get_process_device_data(dev, process);
1041 	if (!pdd)
1042 		return -EINVAL;
1043 	qpd = &pdd->qpd;
1044 
1045 	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1046 					get_order(KFD_CWSR_TBA_TMA_SIZE));
1047 	if (!qpd->cwsr_kaddr) {
1048 		pr_err("Error allocating per process CWSR buffer.\n");
1049 		return -ENOMEM;
1050 	}
1051 
1052 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1053 		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1054 	/* Mapping pages to user process */
1055 	return remap_pfn_range(vma, vma->vm_start,
1056 			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1057 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1058 }
1059 
1060 void kfd_flush_tlb(struct kfd_process_device *pdd)
1061 {
1062 	struct kfd_dev *dev = pdd->dev;
1063 	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
1064 
1065 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1066 		/* Nothing to flush until a VMID is assigned, which
1067 		 * only happens when the first queue is created.
1068 		 */
1069 		if (pdd->qpd.vmid)
1070 			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
1071 	} else {
1072 		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
1073 	}
1074 }
1075 
1076 #if defined(CONFIG_DEBUG_FS)
1077 
1078 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1079 {
1080 	struct kfd_process *p;
1081 	unsigned int temp;
1082 	int r = 0;
1083 
1084 	int idx = srcu_read_lock(&kfd_processes_srcu);
1085 
1086 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1087 		seq_printf(m, "Process %d PASID %d:\n",
1088 			   p->lead_thread->tgid, p->pasid);
1089 
1090 		mutex_lock(&p->mutex);
1091 		r = pqm_debugfs_mqds(m, &p->pqm);
1092 		mutex_unlock(&p->mutex);
1093 
1094 		if (r)
1095 			break;
1096 	}
1097 
1098 	srcu_read_unlock(&kfd_processes_srcu, idx);
1099 
1100 	return r;
1101 }
1102 
1103 #endif
1104