1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/slab.h>
27 #include <linux/amd-iommu.h>
28 #include <linux/notifier.h>
29 #include <linux/compat.h>
30 
31 struct mm_struct;
32 
33 #include "kfd_priv.h"
34 #include "kfd_dbgmgr.h"
35 
36 /*
37  * Initial size for the array of queues.
38  * The allocated size is doubled each time
39  * it is exceeded up to MAX_PROCESS_QUEUES.
40  */
41 #define INITIAL_QUEUE_ARRAY_SIZE 16
42 
43 /*
44  * List of struct kfd_process (field kfd_process).
45  * Unique/indexed by mm_struct*
46  */
47 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
48 static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
49 static DEFINE_MUTEX(kfd_processes_mutex);
50 
51 DEFINE_STATIC_SRCU(kfd_processes_srcu);
52 
53 static struct workqueue_struct *kfd_process_wq;
54 
55 struct kfd_process_release_work {
56 	struct work_struct kfd_work;
57 	struct kfd_process *p;
58 };
59 
60 static struct kfd_process *find_process(const struct task_struct *thread);
61 static struct kfd_process *create_process(const struct task_struct *thread);
62 
63 void kfd_process_create_wq(void)
64 {
65 	if (!kfd_process_wq)
66 		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
67 }
68 
69 void kfd_process_destroy_wq(void)
70 {
71 	if (kfd_process_wq) {
72 		destroy_workqueue(kfd_process_wq);
73 		kfd_process_wq = NULL;
74 	}
75 }
76 
77 struct kfd_process *kfd_create_process(const struct task_struct *thread)
78 {
79 	struct kfd_process *process;
80 
81 	BUG_ON(!kfd_process_wq);
82 
83 	if (thread->mm == NULL)
84 		return ERR_PTR(-EINVAL);
85 
86 	/* Only the pthreads threading model is supported. */
87 	if (thread->group_leader->mm != thread->mm)
88 		return ERR_PTR(-EINVAL);
89 
90 	/* Take mmap_sem because we call __mmu_notifier_register inside */
91 	down_write(&thread->mm->mmap_sem);
92 
93 	/*
94 	 * take kfd processes mutex before starting of process creation
95 	 * so there won't be a case where two threads of the same process
96 	 * create two kfd_process structures
97 	 */
98 	mutex_lock(&kfd_processes_mutex);
99 
100 	/* A prior open of /dev/kfd could have already created the process. */
101 	process = find_process(thread);
102 	if (process)
103 		pr_debug("kfd: process already found\n");
104 
105 	if (!process)
106 		process = create_process(thread);
107 
108 	mutex_unlock(&kfd_processes_mutex);
109 
110 	up_write(&thread->mm->mmap_sem);
111 
112 	return process;
113 }
114 
115 struct kfd_process *kfd_get_process(const struct task_struct *thread)
116 {
117 	struct kfd_process *process;
118 
119 	if (thread->mm == NULL)
120 		return ERR_PTR(-EINVAL);
121 
122 	/* Only the pthreads threading model is supported. */
123 	if (thread->group_leader->mm != thread->mm)
124 		return ERR_PTR(-EINVAL);
125 
126 	process = find_process(thread);
127 
128 	return process;
129 }
130 
131 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
132 {
133 	struct kfd_process *process;
134 
135 	hash_for_each_possible_rcu(kfd_processes_table, process,
136 					kfd_processes, (uintptr_t)mm)
137 		if (process->mm == mm)
138 			return process;
139 
140 	return NULL;
141 }
142 
143 static struct kfd_process *find_process(const struct task_struct *thread)
144 {
145 	struct kfd_process *p;
146 	int idx;
147 
148 	idx = srcu_read_lock(&kfd_processes_srcu);
149 	p = find_process_by_mm(thread->mm);
150 	srcu_read_unlock(&kfd_processes_srcu, idx);
151 
152 	return p;
153 }
154 
155 static void kfd_process_wq_release(struct work_struct *work)
156 {
157 	struct kfd_process_release_work *my_work;
158 	struct kfd_process_device *pdd, *temp;
159 	struct kfd_process *p;
160 
161 	my_work = (struct kfd_process_release_work *) work;
162 
163 	p = my_work->p;
164 
165 	pr_debug("Releasing process (pasid %d) in workqueue\n",
166 			p->pasid);
167 
168 	mutex_lock(&p->mutex);
169 
170 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
171 							per_device_list) {
172 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
173 				pdd->dev->id, p->pasid);
174 
175 		if (pdd->reset_wavefronts)
176 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
177 
178 		amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
179 		list_del(&pdd->per_device_list);
180 
181 		kfree(pdd);
182 	}
183 
184 	kfd_event_free_process(p);
185 
186 	kfd_pasid_free(p->pasid);
187 
188 	mutex_unlock(&p->mutex);
189 
190 	mutex_destroy(&p->mutex);
191 
192 	kfree(p->queues);
193 
194 	kfree(p);
195 
196 	kfree(work);
197 }
198 
199 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
200 {
201 	struct kfd_process_release_work *work;
202 	struct kfd_process *p;
203 
204 	BUG_ON(!kfd_process_wq);
205 
206 	p = container_of(rcu, struct kfd_process, rcu);
207 	BUG_ON(atomic_read(&p->mm->mm_count) <= 0);
208 
209 	mmdrop(p->mm);
210 
211 	work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
212 
213 	if (work) {
214 		INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
215 		work->p = p;
216 		queue_work(kfd_process_wq, (struct work_struct *) work);
217 	}
218 }
219 
220 static void kfd_process_notifier_release(struct mmu_notifier *mn,
221 					struct mm_struct *mm)
222 {
223 	struct kfd_process *p;
224 	struct kfd_process_device *pdd = NULL;
225 
226 	/*
227 	 * The kfd_process structure can not be free because the
228 	 * mmu_notifier srcu is read locked
229 	 */
230 	p = container_of(mn, struct kfd_process, mmu_notifier);
231 	BUG_ON(p->mm != mm);
232 
233 	mutex_lock(&kfd_processes_mutex);
234 	hash_del_rcu(&p->kfd_processes);
235 	mutex_unlock(&kfd_processes_mutex);
236 	synchronize_srcu(&kfd_processes_srcu);
237 
238 	mutex_lock(&p->mutex);
239 
240 	/* In case our notifier is called before IOMMU notifier */
241 	pqm_uninit(&p->pqm);
242 
243 	/* Iterate over all process device data structure and check
244 	 * if we should delete debug managers and reset all wavefronts
245 	 */
246 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
247 		if ((pdd->dev->dbgmgr) &&
248 				(pdd->dev->dbgmgr->pasid == p->pasid))
249 			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
250 
251 		if (pdd->reset_wavefronts) {
252 			pr_warn("amdkfd: Resetting all wave fronts\n");
253 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
254 			pdd->reset_wavefronts = false;
255 		}
256 	}
257 
258 	mutex_unlock(&p->mutex);
259 
260 	/*
261 	 * Because we drop mm_count inside kfd_process_destroy_delayed
262 	 * and because the mmu_notifier_unregister function also drop
263 	 * mm_count we need to take an extra count here.
264 	 */
265 	atomic_inc(&p->mm->mm_count);
266 	mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
267 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
268 }
269 
270 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
271 	.release = kfd_process_notifier_release,
272 };
273 
274 static struct kfd_process *create_process(const struct task_struct *thread)
275 {
276 	struct kfd_process *process;
277 	int err = -ENOMEM;
278 
279 	process = kzalloc(sizeof(*process), GFP_KERNEL);
280 
281 	if (!process)
282 		goto err_alloc_process;
283 
284 	process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
285 					sizeof(process->queues[0]), GFP_KERNEL);
286 	if (!process->queues)
287 		goto err_alloc_queues;
288 
289 	process->pasid = kfd_pasid_alloc();
290 	if (process->pasid == 0)
291 		goto err_alloc_pasid;
292 
293 	mutex_init(&process->mutex);
294 
295 	process->mm = thread->mm;
296 
297 	/* register notifier */
298 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
299 	err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
300 	if (err)
301 		goto err_mmu_notifier;
302 
303 	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
304 			(uintptr_t)process->mm);
305 
306 	process->lead_thread = thread->group_leader;
307 
308 	process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
309 
310 	INIT_LIST_HEAD(&process->per_device_data);
311 
312 	kfd_event_init_process(process);
313 
314 	err = pqm_init(&process->pqm, process);
315 	if (err != 0)
316 		goto err_process_pqm_init;
317 
318 	/* init process apertures*/
319 	process->is_32bit_user_mode = in_compat_syscall();
320 	if (kfd_init_apertures(process) != 0)
321 		goto err_init_apretures;
322 
323 	return process;
324 
325 err_init_apretures:
326 	pqm_uninit(&process->pqm);
327 err_process_pqm_init:
328 	hash_del_rcu(&process->kfd_processes);
329 	synchronize_rcu();
330 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
331 err_mmu_notifier:
332 	mutex_destroy(&process->mutex);
333 	kfd_pasid_free(process->pasid);
334 err_alloc_pasid:
335 	kfree(process->queues);
336 err_alloc_queues:
337 	kfree(process);
338 err_alloc_process:
339 	return ERR_PTR(err);
340 }
341 
342 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
343 							struct kfd_process *p)
344 {
345 	struct kfd_process_device *pdd = NULL;
346 
347 	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
348 		if (pdd->dev == dev)
349 			break;
350 
351 	return pdd;
352 }
353 
354 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
355 							struct kfd_process *p)
356 {
357 	struct kfd_process_device *pdd = NULL;
358 
359 	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
360 	if (pdd != NULL) {
361 		pdd->dev = dev;
362 		INIT_LIST_HEAD(&pdd->qpd.queues_list);
363 		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
364 		pdd->qpd.dqm = dev->dqm;
365 		pdd->reset_wavefronts = false;
366 		list_add(&pdd->per_device_list, &p->per_device_data);
367 	}
368 
369 	return pdd;
370 }
371 
372 /*
373  * Direct the IOMMU to bind the process (specifically the pasid->mm)
374  * to the device.
375  * Unbinding occurs when the process dies or the device is removed.
376  *
377  * Assumes that the process lock is held.
378  */
379 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
380 							struct kfd_process *p)
381 {
382 	struct kfd_process_device *pdd;
383 	int err;
384 
385 	pdd = kfd_get_process_device_data(dev, p);
386 	if (!pdd) {
387 		pr_err("Process device data doesn't exist\n");
388 		return ERR_PTR(-ENOMEM);
389 	}
390 
391 	if (pdd->bound)
392 		return pdd;
393 
394 	err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
395 	if (err < 0)
396 		return ERR_PTR(err);
397 
398 	pdd->bound = true;
399 
400 	return pdd;
401 }
402 
403 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
404 {
405 	struct kfd_process *p;
406 	struct kfd_process_device *pdd;
407 
408 	BUG_ON(dev == NULL);
409 
410 	/*
411 	 * Look for the process that matches the pasid. If there is no such
412 	 * process, we either released it in amdkfd's own notifier, or there
413 	 * is a bug. Unfortunately, there is no way to tell...
414 	 */
415 	p = kfd_lookup_process_by_pasid(pasid);
416 	if (!p)
417 		return;
418 
419 	pr_debug("Unbinding process %d from IOMMU\n", pasid);
420 
421 	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
422 		kfd_dbgmgr_destroy(dev->dbgmgr);
423 
424 	pqm_uninit(&p->pqm);
425 
426 	pdd = kfd_get_process_device_data(dev, p);
427 
428 	if (!pdd) {
429 		mutex_unlock(&p->mutex);
430 		return;
431 	}
432 
433 	if (pdd->reset_wavefronts) {
434 		dbgdev_wave_reset_wavefronts(pdd->dev, p);
435 		pdd->reset_wavefronts = false;
436 	}
437 
438 	/*
439 	 * Just mark pdd as unbound, because we still need it
440 	 * to call amd_iommu_unbind_pasid() in when the
441 	 * process exits.
442 	 * We don't call amd_iommu_unbind_pasid() here
443 	 * because the IOMMU called us.
444 	 */
445 	pdd->bound = false;
446 
447 	mutex_unlock(&p->mutex);
448 }
449 
450 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
451 {
452 	return list_first_entry(&p->per_device_data,
453 				struct kfd_process_device,
454 				per_device_list);
455 }
456 
457 struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
458 						struct kfd_process_device *pdd)
459 {
460 	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
461 		return NULL;
462 	return list_next_entry(pdd, per_device_list);
463 }
464 
465 bool kfd_has_process_device_data(struct kfd_process *p)
466 {
467 	return !(list_empty(&p->per_device_data));
468 }
469 
470 /* This returns with process->mutex locked. */
471 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
472 {
473 	struct kfd_process *p;
474 	unsigned int temp;
475 
476 	int idx = srcu_read_lock(&kfd_processes_srcu);
477 
478 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
479 		if (p->pasid == pasid) {
480 			mutex_lock(&p->mutex);
481 			break;
482 		}
483 	}
484 
485 	srcu_read_unlock(&kfd_processes_srcu, idx);
486 
487 	return p;
488 }
489