1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 #include "amdgpu_amdkfd.h"
35 #include "amdgpu.h"
36 
37 struct mm_struct;
38 
39 #include "kfd_priv.h"
40 #include "kfd_device_queue_manager.h"
41 #include "kfd_dbgmgr.h"
42 #include "kfd_iommu.h"
43 
44 /*
45  * List of struct kfd_process (field kfd_process).
46  * Unique/indexed by mm_struct*
47  */
48 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
49 static DEFINE_MUTEX(kfd_processes_mutex);
50 
51 DEFINE_SRCU(kfd_processes_srcu);
52 
53 /* For process termination handling */
54 static struct workqueue_struct *kfd_process_wq;
55 
56 /* Ordered, single-threaded workqueue for restoring evicted
57  * processes. Restoring multiple processes concurrently under memory
58  * pressure can lead to processes blocking each other from validating
59  * their BOs and result in a live-lock situation where processes
60  * remain evicted indefinitely.
61  */
62 static struct workqueue_struct *kfd_restore_wq;
63 
64 static struct kfd_process *find_process(const struct task_struct *thread);
65 static void kfd_process_ref_release(struct kref *ref);
66 static struct kfd_process *create_process(const struct task_struct *thread);
67 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
68 
69 static void evict_process_worker(struct work_struct *work);
70 static void restore_process_worker(struct work_struct *work);
71 
72 struct kfd_procfs_tree {
73 	struct kobject *kobj;
74 };
75 
76 static struct kfd_procfs_tree procfs;
77 
78 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
79 			       char *buffer)
80 {
81 	int val = 0;
82 
83 	if (strcmp(attr->name, "pasid") == 0) {
84 		struct kfd_process *p = container_of(attr, struct kfd_process,
85 						     attr_pasid);
86 		val = p->pasid;
87 	} else {
88 		pr_err("Invalid attribute");
89 		return -EINVAL;
90 	}
91 
92 	return snprintf(buffer, PAGE_SIZE, "%d\n", val);
93 }
94 
95 static void kfd_procfs_kobj_release(struct kobject *kobj)
96 {
97 	kfree(kobj);
98 }
99 
100 static const struct sysfs_ops kfd_procfs_ops = {
101 	.show = kfd_procfs_show,
102 };
103 
104 static struct kobj_type procfs_type = {
105 	.release = kfd_procfs_kobj_release,
106 	.sysfs_ops = &kfd_procfs_ops,
107 };
108 
109 void kfd_procfs_init(void)
110 {
111 	int ret = 0;
112 
113 	procfs.kobj = kfd_alloc_struct(procfs.kobj);
114 	if (!procfs.kobj)
115 		return;
116 
117 	ret = kobject_init_and_add(procfs.kobj, &procfs_type,
118 				   &kfd_device->kobj, "proc");
119 	if (ret) {
120 		pr_warn("Could not create procfs proc folder");
121 		/* If we fail to create the procfs, clean up */
122 		kfd_procfs_shutdown();
123 	}
124 }
125 
126 void kfd_procfs_shutdown(void)
127 {
128 	if (procfs.kobj) {
129 		kobject_del(procfs.kobj);
130 		kobject_put(procfs.kobj);
131 		procfs.kobj = NULL;
132 	}
133 }
134 
135 int kfd_process_create_wq(void)
136 {
137 	if (!kfd_process_wq)
138 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
139 	if (!kfd_restore_wq)
140 		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
141 
142 	if (!kfd_process_wq || !kfd_restore_wq) {
143 		kfd_process_destroy_wq();
144 		return -ENOMEM;
145 	}
146 
147 	return 0;
148 }
149 
150 void kfd_process_destroy_wq(void)
151 {
152 	if (kfd_process_wq) {
153 		destroy_workqueue(kfd_process_wq);
154 		kfd_process_wq = NULL;
155 	}
156 	if (kfd_restore_wq) {
157 		destroy_workqueue(kfd_restore_wq);
158 		kfd_restore_wq = NULL;
159 	}
160 }
161 
162 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
163 			struct kfd_process_device *pdd)
164 {
165 	struct kfd_dev *dev = pdd->dev;
166 
167 	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
168 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem);
169 }
170 
171 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
172  *	This function should be only called right after the process
173  *	is created and when kfd_processes_mutex is still being held
174  *	to avoid concurrency. Because of that exclusiveness, we do
175  *	not need to take p->mutex.
176  */
177 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
178 				   uint64_t gpu_va, uint32_t size,
179 				   uint32_t flags, void **kptr)
180 {
181 	struct kfd_dev *kdev = pdd->dev;
182 	struct kgd_mem *mem = NULL;
183 	int handle;
184 	int err;
185 
186 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
187 						 pdd->vm, &mem, NULL, flags);
188 	if (err)
189 		goto err_alloc_mem;
190 
191 	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
192 	if (err)
193 		goto err_map_mem;
194 
195 	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
196 	if (err) {
197 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
198 		goto sync_memory_failed;
199 	}
200 
201 	/* Create an obj handle so kfd_process_device_remove_obj_handle
202 	 * will take care of the bo removal when the process finishes.
203 	 * We do not need to take p->mutex, because the process is just
204 	 * created and the ioctls have not had the chance to run.
205 	 */
206 	handle = kfd_process_device_create_obj_handle(pdd, mem);
207 
208 	if (handle < 0) {
209 		err = handle;
210 		goto free_gpuvm;
211 	}
212 
213 	if (kptr) {
214 		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
215 				(struct kgd_mem *)mem, kptr, NULL);
216 		if (err) {
217 			pr_debug("Map GTT BO to kernel failed\n");
218 			goto free_obj_handle;
219 		}
220 	}
221 
222 	return err;
223 
224 free_obj_handle:
225 	kfd_process_device_remove_obj_handle(pdd, handle);
226 free_gpuvm:
227 sync_memory_failed:
228 	kfd_process_free_gpuvm(mem, pdd);
229 	return err;
230 
231 err_map_mem:
232 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem);
233 err_alloc_mem:
234 	*kptr = NULL;
235 	return err;
236 }
237 
238 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
239  *	process for IB usage The memory reserved is for KFD to submit
240  *	IB to AMDGPU from kernel.  If the memory is reserved
241  *	successfully, ib_kaddr will have the CPU/kernel
242  *	address. Check ib_kaddr before accessing the memory.
243  */
244 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
245 {
246 	struct qcm_process_device *qpd = &pdd->qpd;
247 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
248 			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
249 			 ALLOC_MEM_FLAGS_WRITABLE |
250 			 ALLOC_MEM_FLAGS_EXECUTABLE;
251 	void *kaddr;
252 	int ret;
253 
254 	if (qpd->ib_kaddr || !qpd->ib_base)
255 		return 0;
256 
257 	/* ib_base is only set for dGPU */
258 	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
259 				      &kaddr);
260 	if (ret)
261 		return ret;
262 
263 	qpd->ib_kaddr = kaddr;
264 
265 	return 0;
266 }
267 
268 struct kfd_process *kfd_create_process(struct file *filep)
269 {
270 	struct kfd_process *process;
271 	struct task_struct *thread = current;
272 	int ret;
273 
274 	if (!thread->mm)
275 		return ERR_PTR(-EINVAL);
276 
277 	/* Only the pthreads threading model is supported. */
278 	if (thread->group_leader->mm != thread->mm)
279 		return ERR_PTR(-EINVAL);
280 
281 	/*
282 	 * take kfd processes mutex before starting of process creation
283 	 * so there won't be a case where two threads of the same process
284 	 * create two kfd_process structures
285 	 */
286 	mutex_lock(&kfd_processes_mutex);
287 
288 	/* A prior open of /dev/kfd could have already created the process. */
289 	process = find_process(thread);
290 	if (process) {
291 		pr_debug("Process already found\n");
292 	} else {
293 		process = create_process(thread);
294 		if (IS_ERR(process))
295 			goto out;
296 
297 		ret = kfd_process_init_cwsr_apu(process, filep);
298 		if (ret) {
299 			process = ERR_PTR(ret);
300 			goto out;
301 		}
302 
303 		if (!procfs.kobj)
304 			goto out;
305 
306 		process->kobj = kfd_alloc_struct(process->kobj);
307 		if (!process->kobj) {
308 			pr_warn("Creating procfs kobject failed");
309 			goto out;
310 		}
311 		ret = kobject_init_and_add(process->kobj, &procfs_type,
312 					   procfs.kobj, "%d",
313 					   (int)process->lead_thread->pid);
314 		if (ret) {
315 			pr_warn("Creating procfs pid directory failed");
316 			goto out;
317 		}
318 
319 		process->attr_pasid.name = "pasid";
320 		process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
321 		sysfs_attr_init(&process->attr_pasid);
322 		ret = sysfs_create_file(process->kobj, &process->attr_pasid);
323 		if (ret)
324 			pr_warn("Creating pasid for pid %d failed",
325 					(int)process->lead_thread->pid);
326 	}
327 out:
328 	if (!IS_ERR(process))
329 		kref_get(&process->ref);
330 	mutex_unlock(&kfd_processes_mutex);
331 
332 	return process;
333 }
334 
335 struct kfd_process *kfd_get_process(const struct task_struct *thread)
336 {
337 	struct kfd_process *process;
338 
339 	if (!thread->mm)
340 		return ERR_PTR(-EINVAL);
341 
342 	/* Only the pthreads threading model is supported. */
343 	if (thread->group_leader->mm != thread->mm)
344 		return ERR_PTR(-EINVAL);
345 
346 	process = find_process(thread);
347 	if (!process)
348 		return ERR_PTR(-EINVAL);
349 
350 	return process;
351 }
352 
353 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
354 {
355 	struct kfd_process *process;
356 
357 	hash_for_each_possible_rcu(kfd_processes_table, process,
358 					kfd_processes, (uintptr_t)mm)
359 		if (process->mm == mm)
360 			return process;
361 
362 	return NULL;
363 }
364 
365 static struct kfd_process *find_process(const struct task_struct *thread)
366 {
367 	struct kfd_process *p;
368 	int idx;
369 
370 	idx = srcu_read_lock(&kfd_processes_srcu);
371 	p = find_process_by_mm(thread->mm);
372 	srcu_read_unlock(&kfd_processes_srcu, idx);
373 
374 	return p;
375 }
376 
377 void kfd_unref_process(struct kfd_process *p)
378 {
379 	kref_put(&p->ref, kfd_process_ref_release);
380 }
381 
382 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
383 {
384 	struct kfd_process *p = pdd->process;
385 	void *mem;
386 	int id;
387 
388 	/*
389 	 * Remove all handles from idr and release appropriate
390 	 * local memory object
391 	 */
392 	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
393 		struct kfd_process_device *peer_pdd;
394 
395 		list_for_each_entry(peer_pdd, &p->per_device_data,
396 				    per_device_list) {
397 			if (!peer_pdd->vm)
398 				continue;
399 			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
400 				peer_pdd->dev->kgd, mem, peer_pdd->vm);
401 		}
402 
403 		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem);
404 		kfd_process_device_remove_obj_handle(pdd, id);
405 	}
406 }
407 
408 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
409 {
410 	struct kfd_process_device *pdd;
411 
412 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
413 		kfd_process_device_free_bos(pdd);
414 }
415 
416 static void kfd_process_destroy_pdds(struct kfd_process *p)
417 {
418 	struct kfd_process_device *pdd, *temp;
419 
420 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
421 				 per_device_list) {
422 		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
423 				pdd->dev->id, p->pasid);
424 
425 		if (pdd->drm_file) {
426 			amdgpu_amdkfd_gpuvm_release_process_vm(
427 					pdd->dev->kgd, pdd->vm);
428 			fput(pdd->drm_file);
429 		}
430 		else if (pdd->vm)
431 			amdgpu_amdkfd_gpuvm_destroy_process_vm(
432 				pdd->dev->kgd, pdd->vm);
433 
434 		list_del(&pdd->per_device_list);
435 
436 		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
437 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
438 				get_order(KFD_CWSR_TBA_TMA_SIZE));
439 
440 		kfree(pdd->qpd.doorbell_bitmap);
441 		idr_destroy(&pdd->alloc_idr);
442 
443 		kfree(pdd);
444 	}
445 }
446 
447 /* No process locking is needed in this function, because the process
448  * is not findable any more. We must assume that no other thread is
449  * using it any more, otherwise we couldn't safely free the process
450  * structure in the end.
451  */
452 static void kfd_process_wq_release(struct work_struct *work)
453 {
454 	struct kfd_process *p = container_of(work, struct kfd_process,
455 					     release_work);
456 
457 	/* Remove the procfs files */
458 	if (p->kobj) {
459 		sysfs_remove_file(p->kobj, &p->attr_pasid);
460 		kobject_del(p->kobj);
461 		kobject_put(p->kobj);
462 		p->kobj = NULL;
463 	}
464 
465 	kfd_iommu_unbind_process(p);
466 
467 	kfd_process_free_outstanding_kfd_bos(p);
468 
469 	kfd_process_destroy_pdds(p);
470 	dma_fence_put(p->ef);
471 
472 	kfd_event_free_process(p);
473 
474 	kfd_pasid_free(p->pasid);
475 	kfd_free_process_doorbells(p);
476 
477 	mutex_destroy(&p->mutex);
478 
479 	put_task_struct(p->lead_thread);
480 
481 	kfree(p);
482 }
483 
484 static void kfd_process_ref_release(struct kref *ref)
485 {
486 	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
487 
488 	INIT_WORK(&p->release_work, kfd_process_wq_release);
489 	queue_work(kfd_process_wq, &p->release_work);
490 }
491 
492 static void kfd_process_free_notifier(struct mmu_notifier *mn)
493 {
494 	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
495 }
496 
497 static void kfd_process_notifier_release(struct mmu_notifier *mn,
498 					struct mm_struct *mm)
499 {
500 	struct kfd_process *p;
501 	struct kfd_process_device *pdd = NULL;
502 
503 	/*
504 	 * The kfd_process structure can not be free because the
505 	 * mmu_notifier srcu is read locked
506 	 */
507 	p = container_of(mn, struct kfd_process, mmu_notifier);
508 	if (WARN_ON(p->mm != mm))
509 		return;
510 
511 	mutex_lock(&kfd_processes_mutex);
512 	hash_del_rcu(&p->kfd_processes);
513 	mutex_unlock(&kfd_processes_mutex);
514 	synchronize_srcu(&kfd_processes_srcu);
515 
516 	cancel_delayed_work_sync(&p->eviction_work);
517 	cancel_delayed_work_sync(&p->restore_work);
518 
519 	mutex_lock(&p->mutex);
520 
521 	/* Iterate over all process device data structures and if the
522 	 * pdd is in debug mode, we should first force unregistration,
523 	 * then we will be able to destroy the queues
524 	 */
525 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
526 		struct kfd_dev *dev = pdd->dev;
527 
528 		mutex_lock(kfd_get_dbgmgr_mutex());
529 		if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
530 			if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
531 				kfd_dbgmgr_destroy(dev->dbgmgr);
532 				dev->dbgmgr = NULL;
533 			}
534 		}
535 		mutex_unlock(kfd_get_dbgmgr_mutex());
536 	}
537 
538 	kfd_process_dequeue_from_all_devices(p);
539 	pqm_uninit(&p->pqm);
540 
541 	/* Indicate to other users that MM is no longer valid */
542 	p->mm = NULL;
543 
544 	mutex_unlock(&p->mutex);
545 
546 	mmu_notifier_put(&p->mmu_notifier);
547 }
548 
549 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
550 	.release = kfd_process_notifier_release,
551 	.free_notifier = kfd_process_free_notifier,
552 };
553 
554 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
555 {
556 	unsigned long  offset;
557 	struct kfd_process_device *pdd;
558 
559 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
560 		struct kfd_dev *dev = pdd->dev;
561 		struct qcm_process_device *qpd = &pdd->qpd;
562 
563 		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
564 			continue;
565 
566 		offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
567 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
568 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
569 			MAP_SHARED, offset);
570 
571 		if (IS_ERR_VALUE(qpd->tba_addr)) {
572 			int err = qpd->tba_addr;
573 
574 			pr_err("Failure to set tba address. error %d.\n", err);
575 			qpd->tba_addr = 0;
576 			qpd->cwsr_kaddr = NULL;
577 			return err;
578 		}
579 
580 		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
581 
582 		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
583 		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
584 			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
585 	}
586 
587 	return 0;
588 }
589 
590 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
591 {
592 	struct kfd_dev *dev = pdd->dev;
593 	struct qcm_process_device *qpd = &pdd->qpd;
594 	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
595 		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
596 	void *kaddr;
597 	int ret;
598 
599 	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
600 		return 0;
601 
602 	/* cwsr_base is only set for dGPU */
603 	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
604 				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
605 	if (ret)
606 		return ret;
607 
608 	qpd->cwsr_kaddr = kaddr;
609 	qpd->tba_addr = qpd->cwsr_base;
610 
611 	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
612 
613 	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
614 	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
615 		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
616 
617 	return 0;
618 }
619 
620 /*
621  * On return the kfd_process is fully operational and will be freed when the
622  * mm is released
623  */
624 static struct kfd_process *create_process(const struct task_struct *thread)
625 {
626 	struct kfd_process *process;
627 	int err = -ENOMEM;
628 
629 	process = kzalloc(sizeof(*process), GFP_KERNEL);
630 	if (!process)
631 		goto err_alloc_process;
632 
633 	kref_init(&process->ref);
634 	mutex_init(&process->mutex);
635 	process->mm = thread->mm;
636 	process->lead_thread = thread->group_leader;
637 	INIT_LIST_HEAD(&process->per_device_data);
638 	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
639 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
640 	process->last_restore_timestamp = get_jiffies_64();
641 	kfd_event_init_process(process);
642 	process->is_32bit_user_mode = in_compat_syscall();
643 
644 	process->pasid = kfd_pasid_alloc();
645 	if (process->pasid == 0)
646 		goto err_alloc_pasid;
647 
648 	if (kfd_alloc_process_doorbells(process) < 0)
649 		goto err_alloc_doorbells;
650 
651 	err = pqm_init(&process->pqm, process);
652 	if (err != 0)
653 		goto err_process_pqm_init;
654 
655 	/* init process apertures*/
656 	err = kfd_init_apertures(process);
657 	if (err != 0)
658 		goto err_init_apertures;
659 
660 	/* Must be last, have to use release destruction after this */
661 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
662 	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
663 	if (err)
664 		goto err_register_notifier;
665 
666 	get_task_struct(process->lead_thread);
667 	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
668 			(uintptr_t)process->mm);
669 
670 	return process;
671 
672 err_register_notifier:
673 	kfd_process_free_outstanding_kfd_bos(process);
674 	kfd_process_destroy_pdds(process);
675 err_init_apertures:
676 	pqm_uninit(&process->pqm);
677 err_process_pqm_init:
678 	kfd_free_process_doorbells(process);
679 err_alloc_doorbells:
680 	kfd_pasid_free(process->pasid);
681 err_alloc_pasid:
682 	mutex_destroy(&process->mutex);
683 	kfree(process);
684 err_alloc_process:
685 	return ERR_PTR(err);
686 }
687 
688 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
689 			struct kfd_dev *dev)
690 {
691 	unsigned int i;
692 	int range_start = dev->shared_resources.non_cp_doorbells_start;
693 	int range_end = dev->shared_resources.non_cp_doorbells_end;
694 
695 	if (!KFD_IS_SOC15(dev->device_info->asic_family))
696 		return 0;
697 
698 	qpd->doorbell_bitmap =
699 		kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
700 				     BITS_PER_BYTE), GFP_KERNEL);
701 	if (!qpd->doorbell_bitmap)
702 		return -ENOMEM;
703 
704 	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
705 	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
706 	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
707 			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
708 			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
709 
710 	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
711 		if (i >= range_start && i <= range_end) {
712 			set_bit(i, qpd->doorbell_bitmap);
713 			set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
714 				qpd->doorbell_bitmap);
715 		}
716 	}
717 
718 	return 0;
719 }
720 
721 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
722 							struct kfd_process *p)
723 {
724 	struct kfd_process_device *pdd = NULL;
725 
726 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
727 		if (pdd->dev == dev)
728 			return pdd;
729 
730 	return NULL;
731 }
732 
733 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
734 							struct kfd_process *p)
735 {
736 	struct kfd_process_device *pdd = NULL;
737 
738 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
739 	if (!pdd)
740 		return NULL;
741 
742 	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
743 		pr_err("Failed to init doorbell for process\n");
744 		kfree(pdd);
745 		return NULL;
746 	}
747 
748 	pdd->dev = dev;
749 	INIT_LIST_HEAD(&pdd->qpd.queues_list);
750 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
751 	pdd->qpd.dqm = dev->dqm;
752 	pdd->qpd.pqm = &p->pqm;
753 	pdd->qpd.evicted = 0;
754 	pdd->process = p;
755 	pdd->bound = PDD_UNBOUND;
756 	pdd->already_dequeued = false;
757 	list_add(&pdd->per_device_list, &p->per_device_data);
758 
759 	/* Init idr used for memory handle translation */
760 	idr_init(&pdd->alloc_idr);
761 
762 	return pdd;
763 }
764 
765 /**
766  * kfd_process_device_init_vm - Initialize a VM for a process-device
767  *
768  * @pdd: The process-device
769  * @drm_file: Optional pointer to a DRM file descriptor
770  *
771  * If @drm_file is specified, it will be used to acquire the VM from
772  * that file descriptor. If successful, the @pdd takes ownership of
773  * the file descriptor.
774  *
775  * If @drm_file is NULL, a new VM is created.
776  *
777  * Returns 0 on success, -errno on failure.
778  */
779 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
780 			       struct file *drm_file)
781 {
782 	struct kfd_process *p;
783 	struct kfd_dev *dev;
784 	int ret;
785 
786 	if (pdd->vm)
787 		return drm_file ? -EBUSY : 0;
788 
789 	p = pdd->process;
790 	dev = pdd->dev;
791 
792 	if (drm_file)
793 		ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
794 			dev->kgd, drm_file, p->pasid,
795 			&pdd->vm, &p->kgd_process_info, &p->ef);
796 	else
797 		ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid,
798 			&pdd->vm, &p->kgd_process_info, &p->ef);
799 	if (ret) {
800 		pr_err("Failed to create process VM object\n");
801 		return ret;
802 	}
803 
804 	amdgpu_vm_set_task_info(pdd->vm);
805 
806 	ret = kfd_process_device_reserve_ib_mem(pdd);
807 	if (ret)
808 		goto err_reserve_ib_mem;
809 	ret = kfd_process_device_init_cwsr_dgpu(pdd);
810 	if (ret)
811 		goto err_init_cwsr;
812 
813 	pdd->drm_file = drm_file;
814 
815 	return 0;
816 
817 err_init_cwsr:
818 err_reserve_ib_mem:
819 	kfd_process_device_free_bos(pdd);
820 	if (!drm_file)
821 		amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm);
822 	pdd->vm = NULL;
823 
824 	return ret;
825 }
826 
827 /*
828  * Direct the IOMMU to bind the process (specifically the pasid->mm)
829  * to the device.
830  * Unbinding occurs when the process dies or the device is removed.
831  *
832  * Assumes that the process lock is held.
833  */
834 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
835 							struct kfd_process *p)
836 {
837 	struct kfd_process_device *pdd;
838 	int err;
839 
840 	pdd = kfd_get_process_device_data(dev, p);
841 	if (!pdd) {
842 		pr_err("Process device data doesn't exist\n");
843 		return ERR_PTR(-ENOMEM);
844 	}
845 
846 	err = kfd_iommu_bind_process_to_device(pdd);
847 	if (err)
848 		return ERR_PTR(err);
849 
850 	err = kfd_process_device_init_vm(pdd, NULL);
851 	if (err)
852 		return ERR_PTR(err);
853 
854 	return pdd;
855 }
856 
857 struct kfd_process_device *kfd_get_first_process_device_data(
858 						struct kfd_process *p)
859 {
860 	return list_first_entry(&p->per_device_data,
861 				struct kfd_process_device,
862 				per_device_list);
863 }
864 
865 struct kfd_process_device *kfd_get_next_process_device_data(
866 						struct kfd_process *p,
867 						struct kfd_process_device *pdd)
868 {
869 	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
870 		return NULL;
871 	return list_next_entry(pdd, per_device_list);
872 }
873 
874 bool kfd_has_process_device_data(struct kfd_process *p)
875 {
876 	return !(list_empty(&p->per_device_data));
877 }
878 
879 /* Create specific handle mapped to mem from process local memory idr
880  * Assumes that the process lock is held.
881  */
882 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
883 					void *mem)
884 {
885 	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
886 }
887 
888 /* Translate specific handle from process local memory idr
889  * Assumes that the process lock is held.
890  */
891 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
892 					int handle)
893 {
894 	if (handle < 0)
895 		return NULL;
896 
897 	return idr_find(&pdd->alloc_idr, handle);
898 }
899 
900 /* Remove specific handle from process local memory idr
901  * Assumes that the process lock is held.
902  */
903 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
904 					int handle)
905 {
906 	if (handle >= 0)
907 		idr_remove(&pdd->alloc_idr, handle);
908 }
909 
910 /* This increments the process->ref counter. */
911 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
912 {
913 	struct kfd_process *p, *ret_p = NULL;
914 	unsigned int temp;
915 
916 	int idx = srcu_read_lock(&kfd_processes_srcu);
917 
918 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
919 		if (p->pasid == pasid) {
920 			kref_get(&p->ref);
921 			ret_p = p;
922 			break;
923 		}
924 	}
925 
926 	srcu_read_unlock(&kfd_processes_srcu, idx);
927 
928 	return ret_p;
929 }
930 
931 /* This increments the process->ref counter. */
932 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
933 {
934 	struct kfd_process *p;
935 
936 	int idx = srcu_read_lock(&kfd_processes_srcu);
937 
938 	p = find_process_by_mm(mm);
939 	if (p)
940 		kref_get(&p->ref);
941 
942 	srcu_read_unlock(&kfd_processes_srcu, idx);
943 
944 	return p;
945 }
946 
947 /* process_evict_queues - Evict all user queues of a process
948  *
949  * Eviction is reference-counted per process-device. This means multiple
950  * evictions from different sources can be nested safely.
951  */
952 int kfd_process_evict_queues(struct kfd_process *p)
953 {
954 	struct kfd_process_device *pdd;
955 	int r = 0;
956 	unsigned int n_evicted = 0;
957 
958 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
959 		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
960 							    &pdd->qpd);
961 		if (r) {
962 			pr_err("Failed to evict process queues\n");
963 			goto fail;
964 		}
965 		n_evicted++;
966 	}
967 
968 	return r;
969 
970 fail:
971 	/* To keep state consistent, roll back partial eviction by
972 	 * restoring queues
973 	 */
974 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
975 		if (n_evicted == 0)
976 			break;
977 		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
978 							      &pdd->qpd))
979 			pr_err("Failed to restore queues\n");
980 
981 		n_evicted--;
982 	}
983 
984 	return r;
985 }
986 
987 /* process_restore_queues - Restore all user queues of a process */
988 int kfd_process_restore_queues(struct kfd_process *p)
989 {
990 	struct kfd_process_device *pdd;
991 	int r, ret = 0;
992 
993 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
994 		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
995 							      &pdd->qpd);
996 		if (r) {
997 			pr_err("Failed to restore process queues\n");
998 			if (!ret)
999 				ret = r;
1000 		}
1001 	}
1002 
1003 	return ret;
1004 }
1005 
1006 static void evict_process_worker(struct work_struct *work)
1007 {
1008 	int ret;
1009 	struct kfd_process *p;
1010 	struct delayed_work *dwork;
1011 
1012 	dwork = to_delayed_work(work);
1013 
1014 	/* Process termination destroys this worker thread. So during the
1015 	 * lifetime of this thread, kfd_process p will be valid
1016 	 */
1017 	p = container_of(dwork, struct kfd_process, eviction_work);
1018 	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1019 		  "Eviction fence mismatch\n");
1020 
1021 	/* Narrow window of overlap between restore and evict work
1022 	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1023 	 * unreserves KFD BOs, it is possible to evicted again. But
1024 	 * restore has few more steps of finish. So lets wait for any
1025 	 * previous restore work to complete
1026 	 */
1027 	flush_delayed_work(&p->restore_work);
1028 
1029 	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1030 	ret = kfd_process_evict_queues(p);
1031 	if (!ret) {
1032 		dma_fence_signal(p->ef);
1033 		dma_fence_put(p->ef);
1034 		p->ef = NULL;
1035 		queue_delayed_work(kfd_restore_wq, &p->restore_work,
1036 				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1037 
1038 		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1039 	} else
1040 		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1041 }
1042 
1043 static void restore_process_worker(struct work_struct *work)
1044 {
1045 	struct delayed_work *dwork;
1046 	struct kfd_process *p;
1047 	int ret = 0;
1048 
1049 	dwork = to_delayed_work(work);
1050 
1051 	/* Process termination destroys this worker thread. So during the
1052 	 * lifetime of this thread, kfd_process p will be valid
1053 	 */
1054 	p = container_of(dwork, struct kfd_process, restore_work);
1055 	pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1056 
1057 	/* Setting last_restore_timestamp before successful restoration.
1058 	 * Otherwise this would have to be set by KGD (restore_process_bos)
1059 	 * before KFD BOs are unreserved. If not, the process can be evicted
1060 	 * again before the timestamp is set.
1061 	 * If restore fails, the timestamp will be set again in the next
1062 	 * attempt. This would mean that the minimum GPU quanta would be
1063 	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1064 	 * functions)
1065 	 */
1066 
1067 	p->last_restore_timestamp = get_jiffies_64();
1068 	ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1069 						     &p->ef);
1070 	if (ret) {
1071 		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1072 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
1073 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1074 				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1075 		WARN(!ret, "reschedule restore work failed\n");
1076 		return;
1077 	}
1078 
1079 	ret = kfd_process_restore_queues(p);
1080 	if (!ret)
1081 		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1082 	else
1083 		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1084 }
1085 
1086 void kfd_suspend_all_processes(void)
1087 {
1088 	struct kfd_process *p;
1089 	unsigned int temp;
1090 	int idx = srcu_read_lock(&kfd_processes_srcu);
1091 
1092 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1093 		cancel_delayed_work_sync(&p->eviction_work);
1094 		cancel_delayed_work_sync(&p->restore_work);
1095 
1096 		if (kfd_process_evict_queues(p))
1097 			pr_err("Failed to suspend process 0x%x\n", p->pasid);
1098 		dma_fence_signal(p->ef);
1099 		dma_fence_put(p->ef);
1100 		p->ef = NULL;
1101 	}
1102 	srcu_read_unlock(&kfd_processes_srcu, idx);
1103 }
1104 
1105 int kfd_resume_all_processes(void)
1106 {
1107 	struct kfd_process *p;
1108 	unsigned int temp;
1109 	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1110 
1111 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1112 		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1113 			pr_err("Restore process %d failed during resume\n",
1114 			       p->pasid);
1115 			ret = -EFAULT;
1116 		}
1117 	}
1118 	srcu_read_unlock(&kfd_processes_srcu, idx);
1119 	return ret;
1120 }
1121 
1122 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1123 			  struct vm_area_struct *vma)
1124 {
1125 	struct kfd_process_device *pdd;
1126 	struct qcm_process_device *qpd;
1127 
1128 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1129 		pr_err("Incorrect CWSR mapping size.\n");
1130 		return -EINVAL;
1131 	}
1132 
1133 	pdd = kfd_get_process_device_data(dev, process);
1134 	if (!pdd)
1135 		return -EINVAL;
1136 	qpd = &pdd->qpd;
1137 
1138 	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1139 					get_order(KFD_CWSR_TBA_TMA_SIZE));
1140 	if (!qpd->cwsr_kaddr) {
1141 		pr_err("Error allocating per process CWSR buffer.\n");
1142 		return -ENOMEM;
1143 	}
1144 
1145 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1146 		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1147 	/* Mapping pages to user process */
1148 	return remap_pfn_range(vma, vma->vm_start,
1149 			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1150 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1151 }
1152 
1153 void kfd_flush_tlb(struct kfd_process_device *pdd)
1154 {
1155 	struct kfd_dev *dev = pdd->dev;
1156 
1157 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1158 		/* Nothing to flush until a VMID is assigned, which
1159 		 * only happens when the first queue is created.
1160 		 */
1161 		if (pdd->qpd.vmid)
1162 			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1163 							pdd->qpd.vmid);
1164 	} else {
1165 		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1166 						pdd->process->pasid);
1167 	}
1168 }
1169 
1170 #if defined(CONFIG_DEBUG_FS)
1171 
1172 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1173 {
1174 	struct kfd_process *p;
1175 	unsigned int temp;
1176 	int r = 0;
1177 
1178 	int idx = srcu_read_lock(&kfd_processes_srcu);
1179 
1180 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1181 		seq_printf(m, "Process %d PASID 0x%x:\n",
1182 			   p->lead_thread->tgid, p->pasid);
1183 
1184 		mutex_lock(&p->mutex);
1185 		r = pqm_debugfs_mqds(m, &p->pqm);
1186 		mutex_unlock(&p->mutex);
1187 
1188 		if (r)
1189 			break;
1190 	}
1191 
1192 	srcu_read_unlock(&kfd_processes_srcu, idx);
1193 
1194 	return r;
1195 }
1196 
1197 #endif
1198 
1199