1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/mutex.h> 24 #include <linux/log2.h> 25 #include <linux/sched.h> 26 #include <linux/sched/mm.h> 27 #include <linux/slab.h> 28 #include <linux/amd-iommu.h> 29 #include <linux/notifier.h> 30 #include <linux/compat.h> 31 32 struct mm_struct; 33 34 #include "kfd_priv.h" 35 #include "kfd_dbgmgr.h" 36 37 /* 38 * Initial size for the array of queues. 39 * The allocated size is doubled each time 40 * it is exceeded up to MAX_PROCESS_QUEUES. 41 */ 42 #define INITIAL_QUEUE_ARRAY_SIZE 16 43 44 /* 45 * List of struct kfd_process (field kfd_process). 46 * Unique/indexed by mm_struct* 47 */ 48 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 49 static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 50 static DEFINE_MUTEX(kfd_processes_mutex); 51 52 DEFINE_STATIC_SRCU(kfd_processes_srcu); 53 54 static struct workqueue_struct *kfd_process_wq; 55 56 struct kfd_process_release_work { 57 struct work_struct kfd_work; 58 struct kfd_process *p; 59 }; 60 61 static struct kfd_process *find_process(const struct task_struct *thread); 62 static struct kfd_process *create_process(const struct task_struct *thread); 63 64 void kfd_process_create_wq(void) 65 { 66 if (!kfd_process_wq) 67 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 68 } 69 70 void kfd_process_destroy_wq(void) 71 { 72 if (kfd_process_wq) { 73 destroy_workqueue(kfd_process_wq); 74 kfd_process_wq = NULL; 75 } 76 } 77 78 struct kfd_process *kfd_create_process(const struct task_struct *thread) 79 { 80 struct kfd_process *process; 81 82 if (!thread->mm) 83 return ERR_PTR(-EINVAL); 84 85 /* Only the pthreads threading model is supported. */ 86 if (thread->group_leader->mm != thread->mm) 87 return ERR_PTR(-EINVAL); 88 89 /* Take mmap_sem because we call __mmu_notifier_register inside */ 90 down_write(&thread->mm->mmap_sem); 91 92 /* 93 * take kfd processes mutex before starting of process creation 94 * so there won't be a case where two threads of the same process 95 * create two kfd_process structures 96 */ 97 mutex_lock(&kfd_processes_mutex); 98 99 /* A prior open of /dev/kfd could have already created the process. */ 100 process = find_process(thread); 101 if (process) 102 pr_debug("Process already found\n"); 103 104 if (!process) 105 process = create_process(thread); 106 107 mutex_unlock(&kfd_processes_mutex); 108 109 up_write(&thread->mm->mmap_sem); 110 111 return process; 112 } 113 114 struct kfd_process *kfd_get_process(const struct task_struct *thread) 115 { 116 struct kfd_process *process; 117 118 if (!thread->mm) 119 return ERR_PTR(-EINVAL); 120 121 /* Only the pthreads threading model is supported. */ 122 if (thread->group_leader->mm != thread->mm) 123 return ERR_PTR(-EINVAL); 124 125 process = find_process(thread); 126 127 return process; 128 } 129 130 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 131 { 132 struct kfd_process *process; 133 134 hash_for_each_possible_rcu(kfd_processes_table, process, 135 kfd_processes, (uintptr_t)mm) 136 if (process->mm == mm) 137 return process; 138 139 return NULL; 140 } 141 142 static struct kfd_process *find_process(const struct task_struct *thread) 143 { 144 struct kfd_process *p; 145 int idx; 146 147 idx = srcu_read_lock(&kfd_processes_srcu); 148 p = find_process_by_mm(thread->mm); 149 srcu_read_unlock(&kfd_processes_srcu, idx); 150 151 return p; 152 } 153 154 static void kfd_process_wq_release(struct work_struct *work) 155 { 156 struct kfd_process_release_work *my_work; 157 struct kfd_process_device *pdd, *temp; 158 struct kfd_process *p; 159 160 my_work = (struct kfd_process_release_work *) work; 161 162 p = my_work->p; 163 164 pr_debug("Releasing process (pasid %d) in workqueue\n", 165 p->pasid); 166 167 mutex_lock(&p->mutex); 168 169 list_for_each_entry_safe(pdd, temp, &p->per_device_data, 170 per_device_list) { 171 pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", 172 pdd->dev->id, p->pasid); 173 174 if (pdd->reset_wavefronts) 175 dbgdev_wave_reset_wavefronts(pdd->dev, p); 176 177 amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); 178 list_del(&pdd->per_device_list); 179 180 kfree(pdd); 181 } 182 183 kfd_event_free_process(p); 184 185 kfd_pasid_free(p->pasid); 186 187 mutex_unlock(&p->mutex); 188 189 mutex_destroy(&p->mutex); 190 191 kfree(p->queues); 192 193 kfree(p); 194 195 kfree(work); 196 } 197 198 static void kfd_process_destroy_delayed(struct rcu_head *rcu) 199 { 200 struct kfd_process_release_work *work; 201 struct kfd_process *p; 202 203 p = container_of(rcu, struct kfd_process, rcu); 204 WARN_ON(atomic_read(&p->mm->mm_count) <= 0); 205 206 mmdrop(p->mm); 207 208 work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); 209 210 if (work) { 211 INIT_WORK((struct work_struct *) work, kfd_process_wq_release); 212 work->p = p; 213 queue_work(kfd_process_wq, (struct work_struct *) work); 214 } 215 } 216 217 static void kfd_process_notifier_release(struct mmu_notifier *mn, 218 struct mm_struct *mm) 219 { 220 struct kfd_process *p; 221 struct kfd_process_device *pdd = NULL; 222 223 /* 224 * The kfd_process structure can not be free because the 225 * mmu_notifier srcu is read locked 226 */ 227 p = container_of(mn, struct kfd_process, mmu_notifier); 228 if (WARN_ON(p->mm != mm)) 229 return; 230 231 mutex_lock(&kfd_processes_mutex); 232 hash_del_rcu(&p->kfd_processes); 233 mutex_unlock(&kfd_processes_mutex); 234 synchronize_srcu(&kfd_processes_srcu); 235 236 mutex_lock(&p->mutex); 237 238 /* In case our notifier is called before IOMMU notifier */ 239 pqm_uninit(&p->pqm); 240 241 /* Iterate over all process device data structure and check 242 * if we should delete debug managers and reset all wavefronts 243 */ 244 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 245 if ((pdd->dev->dbgmgr) && 246 (pdd->dev->dbgmgr->pasid == p->pasid)) 247 kfd_dbgmgr_destroy(pdd->dev->dbgmgr); 248 249 if (pdd->reset_wavefronts) { 250 pr_warn("Resetting all wave fronts\n"); 251 dbgdev_wave_reset_wavefronts(pdd->dev, p); 252 pdd->reset_wavefronts = false; 253 } 254 } 255 256 mutex_unlock(&p->mutex); 257 258 /* 259 * Because we drop mm_count inside kfd_process_destroy_delayed 260 * and because the mmu_notifier_unregister function also drop 261 * mm_count we need to take an extra count here. 262 */ 263 mmgrab(p->mm); 264 mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); 265 mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); 266 } 267 268 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 269 .release = kfd_process_notifier_release, 270 }; 271 272 static struct kfd_process *create_process(const struct task_struct *thread) 273 { 274 struct kfd_process *process; 275 int err = -ENOMEM; 276 277 process = kzalloc(sizeof(*process), GFP_KERNEL); 278 279 if (!process) 280 goto err_alloc_process; 281 282 process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, 283 sizeof(process->queues[0]), GFP_KERNEL); 284 if (!process->queues) 285 goto err_alloc_queues; 286 287 process->pasid = kfd_pasid_alloc(); 288 if (process->pasid == 0) 289 goto err_alloc_pasid; 290 291 mutex_init(&process->mutex); 292 293 process->mm = thread->mm; 294 295 /* register notifier */ 296 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 297 err = __mmu_notifier_register(&process->mmu_notifier, process->mm); 298 if (err) 299 goto err_mmu_notifier; 300 301 hash_add_rcu(kfd_processes_table, &process->kfd_processes, 302 (uintptr_t)process->mm); 303 304 process->lead_thread = thread->group_leader; 305 306 process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; 307 308 INIT_LIST_HEAD(&process->per_device_data); 309 310 kfd_event_init_process(process); 311 312 err = pqm_init(&process->pqm, process); 313 if (err != 0) 314 goto err_process_pqm_init; 315 316 /* init process apertures*/ 317 process->is_32bit_user_mode = in_compat_syscall(); 318 err = kfd_init_apertures(process); 319 if (err != 0) 320 goto err_init_apertures; 321 322 return process; 323 324 err_init_apertures: 325 pqm_uninit(&process->pqm); 326 err_process_pqm_init: 327 hash_del_rcu(&process->kfd_processes); 328 synchronize_rcu(); 329 mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); 330 err_mmu_notifier: 331 mutex_destroy(&process->mutex); 332 kfd_pasid_free(process->pasid); 333 err_alloc_pasid: 334 kfree(process->queues); 335 err_alloc_queues: 336 kfree(process); 337 err_alloc_process: 338 return ERR_PTR(err); 339 } 340 341 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 342 struct kfd_process *p) 343 { 344 struct kfd_process_device *pdd = NULL; 345 346 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 347 if (pdd->dev == dev) 348 break; 349 350 return pdd; 351 } 352 353 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 354 struct kfd_process *p) 355 { 356 struct kfd_process_device *pdd = NULL; 357 358 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 359 if (pdd != NULL) { 360 pdd->dev = dev; 361 INIT_LIST_HEAD(&pdd->qpd.queues_list); 362 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 363 pdd->qpd.dqm = dev->dqm; 364 pdd->reset_wavefronts = false; 365 list_add(&pdd->per_device_list, &p->per_device_data); 366 } 367 368 return pdd; 369 } 370 371 /* 372 * Direct the IOMMU to bind the process (specifically the pasid->mm) 373 * to the device. 374 * Unbinding occurs when the process dies or the device is removed. 375 * 376 * Assumes that the process lock is held. 377 */ 378 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 379 struct kfd_process *p) 380 { 381 struct kfd_process_device *pdd; 382 int err; 383 384 pdd = kfd_get_process_device_data(dev, p); 385 if (!pdd) { 386 pr_err("Process device data doesn't exist\n"); 387 return ERR_PTR(-ENOMEM); 388 } 389 390 if (pdd->bound) 391 return pdd; 392 393 err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); 394 if (err < 0) 395 return ERR_PTR(err); 396 397 pdd->bound = true; 398 399 return pdd; 400 } 401 402 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) 403 { 404 struct kfd_process *p; 405 struct kfd_process_device *pdd; 406 407 /* 408 * Look for the process that matches the pasid. If there is no such 409 * process, we either released it in amdkfd's own notifier, or there 410 * is a bug. Unfortunately, there is no way to tell... 411 */ 412 p = kfd_lookup_process_by_pasid(pasid); 413 if (!p) 414 return; 415 416 pr_debug("Unbinding process %d from IOMMU\n", pasid); 417 418 if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) 419 kfd_dbgmgr_destroy(dev->dbgmgr); 420 421 pqm_uninit(&p->pqm); 422 423 pdd = kfd_get_process_device_data(dev, p); 424 425 if (!pdd) { 426 mutex_unlock(&p->mutex); 427 return; 428 } 429 430 if (pdd->reset_wavefronts) { 431 dbgdev_wave_reset_wavefronts(pdd->dev, p); 432 pdd->reset_wavefronts = false; 433 } 434 435 /* 436 * Just mark pdd as unbound, because we still need it 437 * to call amd_iommu_unbind_pasid() in when the 438 * process exits. 439 * We don't call amd_iommu_unbind_pasid() here 440 * because the IOMMU called us. 441 */ 442 pdd->bound = false; 443 444 mutex_unlock(&p->mutex); 445 } 446 447 struct kfd_process_device *kfd_get_first_process_device_data( 448 struct kfd_process *p) 449 { 450 return list_first_entry(&p->per_device_data, 451 struct kfd_process_device, 452 per_device_list); 453 } 454 455 struct kfd_process_device *kfd_get_next_process_device_data( 456 struct kfd_process *p, 457 struct kfd_process_device *pdd) 458 { 459 if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 460 return NULL; 461 return list_next_entry(pdd, per_device_list); 462 } 463 464 bool kfd_has_process_device_data(struct kfd_process *p) 465 { 466 return !(list_empty(&p->per_device_data)); 467 } 468 469 /* This returns with process->mutex locked. */ 470 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 471 { 472 struct kfd_process *p; 473 unsigned int temp; 474 475 int idx = srcu_read_lock(&kfd_processes_srcu); 476 477 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 478 if (p->pasid == pasid) { 479 mutex_lock(&p->mutex); 480 break; 481 } 482 } 483 484 srcu_read_unlock(&kfd_processes_srcu, idx); 485 486 return p; 487 } 488