1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 
35 struct mm_struct;
36 
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40 #include "kfd_iommu.h"
41 
42 /*
43  * List of struct kfd_process (field kfd_process).
44  * Unique/indexed by mm_struct*
45  */
46 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
47 static DEFINE_MUTEX(kfd_processes_mutex);
48 
49 DEFINE_SRCU(kfd_processes_srcu);
50 
51 /* For process termination handling */
52 static struct workqueue_struct *kfd_process_wq;
53 
54 /* Ordered, single-threaded workqueue for restoring evicted
55  * processes. Restoring multiple processes concurrently under memory
56  * pressure can lead to processes blocking each other from validating
57  * their BOs and result in a live-lock situation where processes
58  * remain evicted indefinitely.
59  */
60 static struct workqueue_struct *kfd_restore_wq;
61 
62 static struct kfd_process *find_process(const struct task_struct *thread);
63 static void kfd_process_ref_release(struct kref *ref);
64 static struct kfd_process *create_process(const struct task_struct *thread,
65 					struct file *filep);
66 
67 static void evict_process_worker(struct work_struct *work);
68 static void restore_process_worker(struct work_struct *work);
69 
70 
71 int kfd_process_create_wq(void)
72 {
73 	if (!kfd_process_wq)
74 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
75 	if (!kfd_restore_wq)
76 		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
77 
78 	if (!kfd_process_wq || !kfd_restore_wq) {
79 		kfd_process_destroy_wq();
80 		return -ENOMEM;
81 	}
82 
83 	return 0;
84 }
85 
86 void kfd_process_destroy_wq(void)
87 {
88 	if (kfd_process_wq) {
89 		destroy_workqueue(kfd_process_wq);
90 		kfd_process_wq = NULL;
91 	}
92 	if (kfd_restore_wq) {
93 		destroy_workqueue(kfd_restore_wq);
94 		kfd_restore_wq = NULL;
95 	}
96 }
97 
98 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
99 			struct kfd_process_device *pdd)
100 {
101 	struct kfd_dev *dev = pdd->dev;
102 
103 	dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
104 	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
105 }
106 
107 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
108  *	This function should be only called right after the process
109  *	is created and when kfd_processes_mutex is still being held
110  *	to avoid concurrency. Because of that exclusiveness, we do
111  *	not need to take p->mutex.
112  */
113 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
114 				   uint64_t gpu_va, uint32_t size,
115 				   uint32_t flags, void **kptr)
116 {
117 	struct kfd_dev *kdev = pdd->dev;
118 	struct kgd_mem *mem = NULL;
119 	int handle;
120 	int err;
121 
122 	err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
123 						 pdd->vm, &mem, NULL, flags);
124 	if (err)
125 		goto err_alloc_mem;
126 
127 	err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
128 	if (err)
129 		goto err_map_mem;
130 
131 	err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
132 	if (err) {
133 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
134 		goto sync_memory_failed;
135 	}
136 
137 	/* Create an obj handle so kfd_process_device_remove_obj_handle
138 	 * will take care of the bo removal when the process finishes.
139 	 * We do not need to take p->mutex, because the process is just
140 	 * created and the ioctls have not had the chance to run.
141 	 */
142 	handle = kfd_process_device_create_obj_handle(pdd, mem);
143 
144 	if (handle < 0) {
145 		err = handle;
146 		goto free_gpuvm;
147 	}
148 
149 	if (kptr) {
150 		err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
151 				(struct kgd_mem *)mem, kptr, NULL);
152 		if (err) {
153 			pr_debug("Map GTT BO to kernel failed\n");
154 			goto free_obj_handle;
155 		}
156 	}
157 
158 	return err;
159 
160 free_obj_handle:
161 	kfd_process_device_remove_obj_handle(pdd, handle);
162 free_gpuvm:
163 sync_memory_failed:
164 	kfd_process_free_gpuvm(mem, pdd);
165 	return err;
166 
167 err_map_mem:
168 	kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
169 err_alloc_mem:
170 	*kptr = NULL;
171 	return err;
172 }
173 
174 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
175  *	process for IB usage The memory reserved is for KFD to submit
176  *	IB to AMDGPU from kernel.  If the memory is reserved
177  *	successfully, ib_kaddr will have the CPU/kernel
178  *	address. Check ib_kaddr before accessing the memory.
179  */
180 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
181 {
182 	struct qcm_process_device *qpd = &pdd->qpd;
183 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
184 			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
185 			 ALLOC_MEM_FLAGS_WRITABLE |
186 			 ALLOC_MEM_FLAGS_EXECUTABLE;
187 	void *kaddr;
188 	int ret;
189 
190 	if (qpd->ib_kaddr || !qpd->ib_base)
191 		return 0;
192 
193 	/* ib_base is only set for dGPU */
194 	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
195 				      &kaddr);
196 	if (ret)
197 		return ret;
198 
199 	qpd->ib_kaddr = kaddr;
200 
201 	return 0;
202 }
203 
204 struct kfd_process *kfd_create_process(struct file *filep)
205 {
206 	struct kfd_process *process;
207 	struct task_struct *thread = current;
208 
209 	if (!thread->mm)
210 		return ERR_PTR(-EINVAL);
211 
212 	/* Only the pthreads threading model is supported. */
213 	if (thread->group_leader->mm != thread->mm)
214 		return ERR_PTR(-EINVAL);
215 
216 	/*
217 	 * take kfd processes mutex before starting of process creation
218 	 * so there won't be a case where two threads of the same process
219 	 * create two kfd_process structures
220 	 */
221 	mutex_lock(&kfd_processes_mutex);
222 
223 	/* A prior open of /dev/kfd could have already created the process. */
224 	process = find_process(thread);
225 	if (process)
226 		pr_debug("Process already found\n");
227 	else
228 		process = create_process(thread, filep);
229 
230 	mutex_unlock(&kfd_processes_mutex);
231 
232 	return process;
233 }
234 
235 struct kfd_process *kfd_get_process(const struct task_struct *thread)
236 {
237 	struct kfd_process *process;
238 
239 	if (!thread->mm)
240 		return ERR_PTR(-EINVAL);
241 
242 	/* Only the pthreads threading model is supported. */
243 	if (thread->group_leader->mm != thread->mm)
244 		return ERR_PTR(-EINVAL);
245 
246 	process = find_process(thread);
247 
248 	return process;
249 }
250 
251 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
252 {
253 	struct kfd_process *process;
254 
255 	hash_for_each_possible_rcu(kfd_processes_table, process,
256 					kfd_processes, (uintptr_t)mm)
257 		if (process->mm == mm)
258 			return process;
259 
260 	return NULL;
261 }
262 
263 static struct kfd_process *find_process(const struct task_struct *thread)
264 {
265 	struct kfd_process *p;
266 	int idx;
267 
268 	idx = srcu_read_lock(&kfd_processes_srcu);
269 	p = find_process_by_mm(thread->mm);
270 	srcu_read_unlock(&kfd_processes_srcu, idx);
271 
272 	return p;
273 }
274 
275 void kfd_unref_process(struct kfd_process *p)
276 {
277 	kref_put(&p->ref, kfd_process_ref_release);
278 }
279 
280 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
281 {
282 	struct kfd_process *p = pdd->process;
283 	void *mem;
284 	int id;
285 
286 	/*
287 	 * Remove all handles from idr and release appropriate
288 	 * local memory object
289 	 */
290 	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
291 		struct kfd_process_device *peer_pdd;
292 
293 		list_for_each_entry(peer_pdd, &p->per_device_data,
294 				    per_device_list) {
295 			if (!peer_pdd->vm)
296 				continue;
297 			peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
298 				peer_pdd->dev->kgd, mem, peer_pdd->vm);
299 		}
300 
301 		pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
302 		kfd_process_device_remove_obj_handle(pdd, id);
303 	}
304 }
305 
306 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
307 {
308 	struct kfd_process_device *pdd;
309 
310 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
311 		kfd_process_device_free_bos(pdd);
312 }
313 
314 static void kfd_process_destroy_pdds(struct kfd_process *p)
315 {
316 	struct kfd_process_device *pdd, *temp;
317 
318 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
319 				 per_device_list) {
320 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
321 				pdd->dev->id, p->pasid);
322 
323 		if (pdd->drm_file)
324 			fput(pdd->drm_file);
325 		else if (pdd->vm)
326 			pdd->dev->kfd2kgd->destroy_process_vm(
327 				pdd->dev->kgd, pdd->vm);
328 
329 		list_del(&pdd->per_device_list);
330 
331 		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
332 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
333 				get_order(KFD_CWSR_TBA_TMA_SIZE));
334 
335 		idr_destroy(&pdd->alloc_idr);
336 
337 		kfree(pdd);
338 	}
339 }
340 
341 /* No process locking is needed in this function, because the process
342  * is not findable any more. We must assume that no other thread is
343  * using it any more, otherwise we couldn't safely free the process
344  * structure in the end.
345  */
346 static void kfd_process_wq_release(struct work_struct *work)
347 {
348 	struct kfd_process *p = container_of(work, struct kfd_process,
349 					     release_work);
350 
351 	kfd_iommu_unbind_process(p);
352 
353 	kfd_process_free_outstanding_kfd_bos(p);
354 
355 	kfd_process_destroy_pdds(p);
356 	dma_fence_put(p->ef);
357 
358 	kfd_event_free_process(p);
359 
360 	kfd_pasid_free(p->pasid);
361 	kfd_free_process_doorbells(p);
362 
363 	mutex_destroy(&p->mutex);
364 
365 	put_task_struct(p->lead_thread);
366 
367 	kfree(p);
368 }
369 
370 static void kfd_process_ref_release(struct kref *ref)
371 {
372 	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
373 
374 	INIT_WORK(&p->release_work, kfd_process_wq_release);
375 	queue_work(kfd_process_wq, &p->release_work);
376 }
377 
378 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
379 {
380 	struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
381 
382 	kfd_unref_process(p);
383 }
384 
385 static void kfd_process_notifier_release(struct mmu_notifier *mn,
386 					struct mm_struct *mm)
387 {
388 	struct kfd_process *p;
389 	struct kfd_process_device *pdd = NULL;
390 
391 	/*
392 	 * The kfd_process structure can not be free because the
393 	 * mmu_notifier srcu is read locked
394 	 */
395 	p = container_of(mn, struct kfd_process, mmu_notifier);
396 	if (WARN_ON(p->mm != mm))
397 		return;
398 
399 	mutex_lock(&kfd_processes_mutex);
400 	hash_del_rcu(&p->kfd_processes);
401 	mutex_unlock(&kfd_processes_mutex);
402 	synchronize_srcu(&kfd_processes_srcu);
403 
404 	cancel_delayed_work_sync(&p->eviction_work);
405 	cancel_delayed_work_sync(&p->restore_work);
406 
407 	mutex_lock(&p->mutex);
408 
409 	/* Iterate over all process device data structures and if the
410 	 * pdd is in debug mode, we should first force unregistration,
411 	 * then we will be able to destroy the queues
412 	 */
413 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
414 		struct kfd_dev *dev = pdd->dev;
415 
416 		mutex_lock(kfd_get_dbgmgr_mutex());
417 		if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
418 			if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
419 				kfd_dbgmgr_destroy(dev->dbgmgr);
420 				dev->dbgmgr = NULL;
421 			}
422 		}
423 		mutex_unlock(kfd_get_dbgmgr_mutex());
424 	}
425 
426 	kfd_process_dequeue_from_all_devices(p);
427 	pqm_uninit(&p->pqm);
428 
429 	/* Indicate to other users that MM is no longer valid */
430 	p->mm = NULL;
431 
432 	mutex_unlock(&p->mutex);
433 
434 	mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
435 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
436 }
437 
438 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
439 	.release = kfd_process_notifier_release,
440 };
441 
442 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
443 {
444 	unsigned long  offset;
445 	struct kfd_process_device *pdd;
446 
447 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
448 		struct kfd_dev *dev = pdd->dev;
449 		struct qcm_process_device *qpd = &pdd->qpd;
450 
451 		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
452 			continue;
453 
454 		offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
455 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
456 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
457 			MAP_SHARED, offset);
458 
459 		if (IS_ERR_VALUE(qpd->tba_addr)) {
460 			int err = qpd->tba_addr;
461 
462 			pr_err("Failure to set tba address. error %d.\n", err);
463 			qpd->tba_addr = 0;
464 			qpd->cwsr_kaddr = NULL;
465 			return err;
466 		}
467 
468 		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
469 
470 		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
471 		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
472 			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
473 	}
474 
475 	return 0;
476 }
477 
478 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
479 {
480 	struct kfd_dev *dev = pdd->dev;
481 	struct qcm_process_device *qpd = &pdd->qpd;
482 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
483 		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
484 	void *kaddr;
485 	int ret;
486 
487 	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
488 		return 0;
489 
490 	/* cwsr_base is only set for dGPU */
491 	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
492 				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
493 	if (ret)
494 		return ret;
495 
496 	qpd->cwsr_kaddr = kaddr;
497 	qpd->tba_addr = qpd->cwsr_base;
498 
499 	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
500 
501 	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
502 	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
503 		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
504 
505 	return 0;
506 }
507 
508 static struct kfd_process *create_process(const struct task_struct *thread,
509 					struct file *filep)
510 {
511 	struct kfd_process *process;
512 	int err = -ENOMEM;
513 
514 	process = kzalloc(sizeof(*process), GFP_KERNEL);
515 
516 	if (!process)
517 		goto err_alloc_process;
518 
519 	process->pasid = kfd_pasid_alloc();
520 	if (process->pasid == 0)
521 		goto err_alloc_pasid;
522 
523 	if (kfd_alloc_process_doorbells(process) < 0)
524 		goto err_alloc_doorbells;
525 
526 	kref_init(&process->ref);
527 
528 	mutex_init(&process->mutex);
529 
530 	process->mm = thread->mm;
531 
532 	/* register notifier */
533 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
534 	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
535 	if (err)
536 		goto err_mmu_notifier;
537 
538 	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
539 			(uintptr_t)process->mm);
540 
541 	process->lead_thread = thread->group_leader;
542 	get_task_struct(process->lead_thread);
543 
544 	INIT_LIST_HEAD(&process->per_device_data);
545 
546 	kfd_event_init_process(process);
547 
548 	err = pqm_init(&process->pqm, process);
549 	if (err != 0)
550 		goto err_process_pqm_init;
551 
552 	/* init process apertures*/
553 	process->is_32bit_user_mode = in_compat_syscall();
554 	err = kfd_init_apertures(process);
555 	if (err != 0)
556 		goto err_init_apertures;
557 
558 	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
559 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
560 	process->last_restore_timestamp = get_jiffies_64();
561 
562 	err = kfd_process_init_cwsr_apu(process, filep);
563 	if (err)
564 		goto err_init_cwsr;
565 
566 	return process;
567 
568 err_init_cwsr:
569 	kfd_process_free_outstanding_kfd_bos(process);
570 	kfd_process_destroy_pdds(process);
571 err_init_apertures:
572 	pqm_uninit(&process->pqm);
573 err_process_pqm_init:
574 	hash_del_rcu(&process->kfd_processes);
575 	synchronize_rcu();
576 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
577 err_mmu_notifier:
578 	mutex_destroy(&process->mutex);
579 	kfd_free_process_doorbells(process);
580 err_alloc_doorbells:
581 	kfd_pasid_free(process->pasid);
582 err_alloc_pasid:
583 	kfree(process);
584 err_alloc_process:
585 	return ERR_PTR(err);
586 }
587 
588 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
589 							struct kfd_process *p)
590 {
591 	struct kfd_process_device *pdd = NULL;
592 
593 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
594 		if (pdd->dev == dev)
595 			return pdd;
596 
597 	return NULL;
598 }
599 
600 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
601 							struct kfd_process *p)
602 {
603 	struct kfd_process_device *pdd = NULL;
604 
605 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
606 	if (!pdd)
607 		return NULL;
608 
609 	pdd->dev = dev;
610 	INIT_LIST_HEAD(&pdd->qpd.queues_list);
611 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
612 	pdd->qpd.dqm = dev->dqm;
613 	pdd->qpd.pqm = &p->pqm;
614 	pdd->qpd.evicted = 0;
615 	pdd->process = p;
616 	pdd->bound = PDD_UNBOUND;
617 	pdd->already_dequeued = false;
618 	list_add(&pdd->per_device_list, &p->per_device_data);
619 
620 	/* Init idr used for memory handle translation */
621 	idr_init(&pdd->alloc_idr);
622 
623 	return pdd;
624 }
625 
626 /**
627  * kfd_process_device_init_vm - Initialize a VM for a process-device
628  *
629  * @pdd: The process-device
630  * @drm_file: Optional pointer to a DRM file descriptor
631  *
632  * If @drm_file is specified, it will be used to acquire the VM from
633  * that file descriptor. If successful, the @pdd takes ownership of
634  * the file descriptor.
635  *
636  * If @drm_file is NULL, a new VM is created.
637  *
638  * Returns 0 on success, -errno on failure.
639  */
640 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
641 			       struct file *drm_file)
642 {
643 	struct kfd_process *p;
644 	struct kfd_dev *dev;
645 	int ret;
646 
647 	if (pdd->vm)
648 		return drm_file ? -EBUSY : 0;
649 
650 	p = pdd->process;
651 	dev = pdd->dev;
652 
653 	if (drm_file)
654 		ret = dev->kfd2kgd->acquire_process_vm(
655 			dev->kgd, drm_file,
656 			&pdd->vm, &p->kgd_process_info, &p->ef);
657 	else
658 		ret = dev->kfd2kgd->create_process_vm(
659 			dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
660 	if (ret) {
661 		pr_err("Failed to create process VM object\n");
662 		return ret;
663 	}
664 
665 	ret = kfd_process_device_reserve_ib_mem(pdd);
666 	if (ret)
667 		goto err_reserve_ib_mem;
668 	ret = kfd_process_device_init_cwsr_dgpu(pdd);
669 	if (ret)
670 		goto err_init_cwsr;
671 
672 	pdd->drm_file = drm_file;
673 
674 	return 0;
675 
676 err_init_cwsr:
677 err_reserve_ib_mem:
678 	kfd_process_device_free_bos(pdd);
679 	if (!drm_file)
680 		dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
681 	pdd->vm = NULL;
682 
683 	return ret;
684 }
685 
686 /*
687  * Direct the IOMMU to bind the process (specifically the pasid->mm)
688  * to the device.
689  * Unbinding occurs when the process dies or the device is removed.
690  *
691  * Assumes that the process lock is held.
692  */
693 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
694 							struct kfd_process *p)
695 {
696 	struct kfd_process_device *pdd;
697 	int err;
698 
699 	pdd = kfd_get_process_device_data(dev, p);
700 	if (!pdd) {
701 		pr_err("Process device data doesn't exist\n");
702 		return ERR_PTR(-ENOMEM);
703 	}
704 
705 	err = kfd_iommu_bind_process_to_device(pdd);
706 	if (err)
707 		return ERR_PTR(err);
708 
709 	err = kfd_process_device_init_vm(pdd, NULL);
710 	if (err)
711 		return ERR_PTR(err);
712 
713 	return pdd;
714 }
715 
716 struct kfd_process_device *kfd_get_first_process_device_data(
717 						struct kfd_process *p)
718 {
719 	return list_first_entry(&p->per_device_data,
720 				struct kfd_process_device,
721 				per_device_list);
722 }
723 
724 struct kfd_process_device *kfd_get_next_process_device_data(
725 						struct kfd_process *p,
726 						struct kfd_process_device *pdd)
727 {
728 	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
729 		return NULL;
730 	return list_next_entry(pdd, per_device_list);
731 }
732 
733 bool kfd_has_process_device_data(struct kfd_process *p)
734 {
735 	return !(list_empty(&p->per_device_data));
736 }
737 
738 /* Create specific handle mapped to mem from process local memory idr
739  * Assumes that the process lock is held.
740  */
741 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
742 					void *mem)
743 {
744 	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
745 }
746 
747 /* Translate specific handle from process local memory idr
748  * Assumes that the process lock is held.
749  */
750 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
751 					int handle)
752 {
753 	if (handle < 0)
754 		return NULL;
755 
756 	return idr_find(&pdd->alloc_idr, handle);
757 }
758 
759 /* Remove specific handle from process local memory idr
760  * Assumes that the process lock is held.
761  */
762 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
763 					int handle)
764 {
765 	if (handle >= 0)
766 		idr_remove(&pdd->alloc_idr, handle);
767 }
768 
769 /* This increments the process->ref counter. */
770 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
771 {
772 	struct kfd_process *p, *ret_p = NULL;
773 	unsigned int temp;
774 
775 	int idx = srcu_read_lock(&kfd_processes_srcu);
776 
777 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
778 		if (p->pasid == pasid) {
779 			kref_get(&p->ref);
780 			ret_p = p;
781 			break;
782 		}
783 	}
784 
785 	srcu_read_unlock(&kfd_processes_srcu, idx);
786 
787 	return ret_p;
788 }
789 
790 /* This increments the process->ref counter. */
791 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
792 {
793 	struct kfd_process *p;
794 
795 	int idx = srcu_read_lock(&kfd_processes_srcu);
796 
797 	p = find_process_by_mm(mm);
798 	if (p)
799 		kref_get(&p->ref);
800 
801 	srcu_read_unlock(&kfd_processes_srcu, idx);
802 
803 	return p;
804 }
805 
806 /* process_evict_queues - Evict all user queues of a process
807  *
808  * Eviction is reference-counted per process-device. This means multiple
809  * evictions from different sources can be nested safely.
810  */
811 static int process_evict_queues(struct kfd_process *p)
812 {
813 	struct kfd_process_device *pdd;
814 	int r = 0;
815 	unsigned int n_evicted = 0;
816 
817 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
818 		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
819 							    &pdd->qpd);
820 		if (r) {
821 			pr_err("Failed to evict process queues\n");
822 			goto fail;
823 		}
824 		n_evicted++;
825 	}
826 
827 	return r;
828 
829 fail:
830 	/* To keep state consistent, roll back partial eviction by
831 	 * restoring queues
832 	 */
833 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
834 		if (n_evicted == 0)
835 			break;
836 		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
837 							      &pdd->qpd))
838 			pr_err("Failed to restore queues\n");
839 
840 		n_evicted--;
841 	}
842 
843 	return r;
844 }
845 
846 /* process_restore_queues - Restore all user queues of a process */
847 static  int process_restore_queues(struct kfd_process *p)
848 {
849 	struct kfd_process_device *pdd;
850 	int r, ret = 0;
851 
852 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
853 		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
854 							      &pdd->qpd);
855 		if (r) {
856 			pr_err("Failed to restore process queues\n");
857 			if (!ret)
858 				ret = r;
859 		}
860 	}
861 
862 	return ret;
863 }
864 
865 static void evict_process_worker(struct work_struct *work)
866 {
867 	int ret;
868 	struct kfd_process *p;
869 	struct delayed_work *dwork;
870 
871 	dwork = to_delayed_work(work);
872 
873 	/* Process termination destroys this worker thread. So during the
874 	 * lifetime of this thread, kfd_process p will be valid
875 	 */
876 	p = container_of(dwork, struct kfd_process, eviction_work);
877 	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
878 		  "Eviction fence mismatch\n");
879 
880 	/* Narrow window of overlap between restore and evict work
881 	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
882 	 * unreserves KFD BOs, it is possible to evicted again. But
883 	 * restore has few more steps of finish. So lets wait for any
884 	 * previous restore work to complete
885 	 */
886 	flush_delayed_work(&p->restore_work);
887 
888 	pr_debug("Started evicting pasid %d\n", p->pasid);
889 	ret = process_evict_queues(p);
890 	if (!ret) {
891 		dma_fence_signal(p->ef);
892 		dma_fence_put(p->ef);
893 		p->ef = NULL;
894 		queue_delayed_work(kfd_restore_wq, &p->restore_work,
895 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
896 
897 		pr_debug("Finished evicting pasid %d\n", p->pasid);
898 	} else
899 		pr_err("Failed to evict queues of pasid %d\n", p->pasid);
900 }
901 
902 static void restore_process_worker(struct work_struct *work)
903 {
904 	struct delayed_work *dwork;
905 	struct kfd_process *p;
906 	struct kfd_process_device *pdd;
907 	int ret = 0;
908 
909 	dwork = to_delayed_work(work);
910 
911 	/* Process termination destroys this worker thread. So during the
912 	 * lifetime of this thread, kfd_process p will be valid
913 	 */
914 	p = container_of(dwork, struct kfd_process, restore_work);
915 
916 	/* Call restore_process_bos on the first KGD device. This function
917 	 * takes care of restoring the whole process including other devices.
918 	 * Restore can fail if enough memory is not available. If so,
919 	 * reschedule again.
920 	 */
921 	pdd = list_first_entry(&p->per_device_data,
922 			       struct kfd_process_device,
923 			       per_device_list);
924 
925 	pr_debug("Started restoring pasid %d\n", p->pasid);
926 
927 	/* Setting last_restore_timestamp before successful restoration.
928 	 * Otherwise this would have to be set by KGD (restore_process_bos)
929 	 * before KFD BOs are unreserved. If not, the process can be evicted
930 	 * again before the timestamp is set.
931 	 * If restore fails, the timestamp will be set again in the next
932 	 * attempt. This would mean that the minimum GPU quanta would be
933 	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
934 	 * functions)
935 	 */
936 
937 	p->last_restore_timestamp = get_jiffies_64();
938 	ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
939 						     &p->ef);
940 	if (ret) {
941 		pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
942 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
943 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
944 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
945 		WARN(!ret, "reschedule restore work failed\n");
946 		return;
947 	}
948 
949 	ret = process_restore_queues(p);
950 	if (!ret)
951 		pr_debug("Finished restoring pasid %d\n", p->pasid);
952 	else
953 		pr_err("Failed to restore queues of pasid %d\n", p->pasid);
954 }
955 
956 void kfd_suspend_all_processes(void)
957 {
958 	struct kfd_process *p;
959 	unsigned int temp;
960 	int idx = srcu_read_lock(&kfd_processes_srcu);
961 
962 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
963 		cancel_delayed_work_sync(&p->eviction_work);
964 		cancel_delayed_work_sync(&p->restore_work);
965 
966 		if (process_evict_queues(p))
967 			pr_err("Failed to suspend process %d\n", p->pasid);
968 		dma_fence_signal(p->ef);
969 		dma_fence_put(p->ef);
970 		p->ef = NULL;
971 	}
972 	srcu_read_unlock(&kfd_processes_srcu, idx);
973 }
974 
975 int kfd_resume_all_processes(void)
976 {
977 	struct kfd_process *p;
978 	unsigned int temp;
979 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
980 
981 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
982 		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
983 			pr_err("Restore process %d failed during resume\n",
984 			       p->pasid);
985 			ret = -EFAULT;
986 		}
987 	}
988 	srcu_read_unlock(&kfd_processes_srcu, idx);
989 	return ret;
990 }
991 
992 int kfd_reserved_mem_mmap(struct kfd_process *process,
993 			  struct vm_area_struct *vma)
994 {
995 	struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
996 	struct kfd_process_device *pdd;
997 	struct qcm_process_device *qpd;
998 
999 	if (!dev)
1000 		return -EINVAL;
1001 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1002 		pr_err("Incorrect CWSR mapping size.\n");
1003 		return -EINVAL;
1004 	}
1005 
1006 	pdd = kfd_get_process_device_data(dev, process);
1007 	if (!pdd)
1008 		return -EINVAL;
1009 	qpd = &pdd->qpd;
1010 
1011 	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1012 					get_order(KFD_CWSR_TBA_TMA_SIZE));
1013 	if (!qpd->cwsr_kaddr) {
1014 		pr_err("Error allocating per process CWSR buffer.\n");
1015 		return -ENOMEM;
1016 	}
1017 
1018 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1019 		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1020 	/* Mapping pages to user process */
1021 	return remap_pfn_range(vma, vma->vm_start,
1022 			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1023 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1024 }
1025 
1026 void kfd_flush_tlb(struct kfd_process_device *pdd)
1027 {
1028 	struct kfd_dev *dev = pdd->dev;
1029 	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
1030 
1031 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1032 		/* Nothing to flush until a VMID is assigned, which
1033 		 * only happens when the first queue is created.
1034 		 */
1035 		if (pdd->qpd.vmid)
1036 			f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
1037 	} else {
1038 		f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
1039 	}
1040 }
1041 
1042 #if defined(CONFIG_DEBUG_FS)
1043 
1044 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1045 {
1046 	struct kfd_process *p;
1047 	unsigned int temp;
1048 	int r = 0;
1049 
1050 	int idx = srcu_read_lock(&kfd_processes_srcu);
1051 
1052 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1053 		seq_printf(m, "Process %d PASID %d:\n",
1054 			   p->lead_thread->tgid, p->pasid);
1055 
1056 		mutex_lock(&p->mutex);
1057 		r = pqm_debugfs_mqds(m, &p->pqm);
1058 		mutex_unlock(&p->mutex);
1059 
1060 		if (r)
1061 			break;
1062 	}
1063 
1064 	srcu_read_unlock(&kfd_processes_srcu, idx);
1065 
1066 	return r;
1067 }
1068 
1069 #endif
1070