1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/mutex.h> 24 #include <linux/log2.h> 25 #include <linux/sched.h> 26 #include <linux/sched/mm.h> 27 #include <linux/slab.h> 28 #include <linux/amd-iommu.h> 29 #include <linux/notifier.h> 30 #include <linux/compat.h> 31 32 struct mm_struct; 33 34 #include "kfd_priv.h" 35 #include "kfd_dbgmgr.h" 36 37 /* 38 * List of struct kfd_process (field kfd_process). 39 * Unique/indexed by mm_struct* 40 */ 41 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 42 static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 43 static DEFINE_MUTEX(kfd_processes_mutex); 44 45 DEFINE_STATIC_SRCU(kfd_processes_srcu); 46 47 static struct workqueue_struct *kfd_process_wq; 48 49 struct kfd_process_release_work { 50 struct work_struct kfd_work; 51 struct kfd_process *p; 52 }; 53 54 static struct kfd_process *find_process(const struct task_struct *thread); 55 static struct kfd_process *create_process(const struct task_struct *thread); 56 57 void kfd_process_create_wq(void) 58 { 59 if (!kfd_process_wq) 60 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 61 } 62 63 void kfd_process_destroy_wq(void) 64 { 65 if (kfd_process_wq) { 66 destroy_workqueue(kfd_process_wq); 67 kfd_process_wq = NULL; 68 } 69 } 70 71 struct kfd_process *kfd_create_process(const struct task_struct *thread) 72 { 73 struct kfd_process *process; 74 75 if (!thread->mm) 76 return ERR_PTR(-EINVAL); 77 78 /* Only the pthreads threading model is supported. */ 79 if (thread->group_leader->mm != thread->mm) 80 return ERR_PTR(-EINVAL); 81 82 /* Take mmap_sem because we call __mmu_notifier_register inside */ 83 down_write(&thread->mm->mmap_sem); 84 85 /* 86 * take kfd processes mutex before starting of process creation 87 * so there won't be a case where two threads of the same process 88 * create two kfd_process structures 89 */ 90 mutex_lock(&kfd_processes_mutex); 91 92 /* A prior open of /dev/kfd could have already created the process. */ 93 process = find_process(thread); 94 if (process) 95 pr_debug("Process already found\n"); 96 97 if (!process) 98 process = create_process(thread); 99 100 mutex_unlock(&kfd_processes_mutex); 101 102 up_write(&thread->mm->mmap_sem); 103 104 return process; 105 } 106 107 struct kfd_process *kfd_get_process(const struct task_struct *thread) 108 { 109 struct kfd_process *process; 110 111 if (!thread->mm) 112 return ERR_PTR(-EINVAL); 113 114 /* Only the pthreads threading model is supported. */ 115 if (thread->group_leader->mm != thread->mm) 116 return ERR_PTR(-EINVAL); 117 118 process = find_process(thread); 119 120 return process; 121 } 122 123 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 124 { 125 struct kfd_process *process; 126 127 hash_for_each_possible_rcu(kfd_processes_table, process, 128 kfd_processes, (uintptr_t)mm) 129 if (process->mm == mm) 130 return process; 131 132 return NULL; 133 } 134 135 static struct kfd_process *find_process(const struct task_struct *thread) 136 { 137 struct kfd_process *p; 138 int idx; 139 140 idx = srcu_read_lock(&kfd_processes_srcu); 141 p = find_process_by_mm(thread->mm); 142 srcu_read_unlock(&kfd_processes_srcu, idx); 143 144 return p; 145 } 146 147 static void kfd_process_wq_release(struct work_struct *work) 148 { 149 struct kfd_process_release_work *my_work; 150 struct kfd_process_device *pdd, *temp; 151 struct kfd_process *p; 152 153 my_work = (struct kfd_process_release_work *) work; 154 155 p = my_work->p; 156 157 pr_debug("Releasing process (pasid %d) in workqueue\n", 158 p->pasid); 159 160 mutex_lock(&p->mutex); 161 162 list_for_each_entry_safe(pdd, temp, &p->per_device_data, 163 per_device_list) { 164 pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", 165 pdd->dev->id, p->pasid); 166 167 if (pdd->bound == PDD_BOUND) 168 amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); 169 170 list_del(&pdd->per_device_list); 171 kfree(pdd); 172 } 173 174 kfd_event_free_process(p); 175 176 kfd_pasid_free(p->pasid); 177 kfd_free_process_doorbells(p); 178 179 mutex_unlock(&p->mutex); 180 181 mutex_destroy(&p->mutex); 182 183 kfree(p); 184 185 kfree(work); 186 } 187 188 static void kfd_process_destroy_delayed(struct rcu_head *rcu) 189 { 190 struct kfd_process_release_work *work; 191 struct kfd_process *p; 192 193 p = container_of(rcu, struct kfd_process, rcu); 194 195 mmdrop(p->mm); 196 197 work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); 198 199 if (work) { 200 INIT_WORK((struct work_struct *) work, kfd_process_wq_release); 201 work->p = p; 202 queue_work(kfd_process_wq, (struct work_struct *) work); 203 } 204 } 205 206 static void kfd_process_notifier_release(struct mmu_notifier *mn, 207 struct mm_struct *mm) 208 { 209 struct kfd_process *p; 210 struct kfd_process_device *pdd = NULL; 211 212 /* 213 * The kfd_process structure can not be free because the 214 * mmu_notifier srcu is read locked 215 */ 216 p = container_of(mn, struct kfd_process, mmu_notifier); 217 if (WARN_ON(p->mm != mm)) 218 return; 219 220 mutex_lock(&kfd_processes_mutex); 221 hash_del_rcu(&p->kfd_processes); 222 mutex_unlock(&kfd_processes_mutex); 223 synchronize_srcu(&kfd_processes_srcu); 224 225 mutex_lock(&p->mutex); 226 227 /* Iterate over all process device data structures and if the 228 * pdd is in debug mode, we should first force unregistration, 229 * then we will be able to destroy the queues 230 */ 231 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 232 struct kfd_dev *dev = pdd->dev; 233 234 mutex_lock(kfd_get_dbgmgr_mutex()); 235 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { 236 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { 237 kfd_dbgmgr_destroy(dev->dbgmgr); 238 dev->dbgmgr = NULL; 239 } 240 } 241 mutex_unlock(kfd_get_dbgmgr_mutex()); 242 } 243 244 kfd_process_dequeue_from_all_devices(p); 245 pqm_uninit(&p->pqm); 246 247 mutex_unlock(&p->mutex); 248 249 /* 250 * Because we drop mm_count inside kfd_process_destroy_delayed 251 * and because the mmu_notifier_unregister function also drop 252 * mm_count we need to take an extra count here. 253 */ 254 mmgrab(p->mm); 255 mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); 256 mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); 257 } 258 259 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 260 .release = kfd_process_notifier_release, 261 }; 262 263 static struct kfd_process *create_process(const struct task_struct *thread) 264 { 265 struct kfd_process *process; 266 int err = -ENOMEM; 267 268 process = kzalloc(sizeof(*process), GFP_KERNEL); 269 270 if (!process) 271 goto err_alloc_process; 272 273 process->pasid = kfd_pasid_alloc(); 274 if (process->pasid == 0) 275 goto err_alloc_pasid; 276 277 if (kfd_alloc_process_doorbells(process) < 0) 278 goto err_alloc_doorbells; 279 280 mutex_init(&process->mutex); 281 282 process->mm = thread->mm; 283 284 /* register notifier */ 285 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 286 err = __mmu_notifier_register(&process->mmu_notifier, process->mm); 287 if (err) 288 goto err_mmu_notifier; 289 290 hash_add_rcu(kfd_processes_table, &process->kfd_processes, 291 (uintptr_t)process->mm); 292 293 process->lead_thread = thread->group_leader; 294 295 INIT_LIST_HEAD(&process->per_device_data); 296 297 kfd_event_init_process(process); 298 299 err = pqm_init(&process->pqm, process); 300 if (err != 0) 301 goto err_process_pqm_init; 302 303 /* init process apertures*/ 304 process->is_32bit_user_mode = in_compat_syscall(); 305 err = kfd_init_apertures(process); 306 if (err != 0) 307 goto err_init_apertures; 308 309 return process; 310 311 err_init_apertures: 312 pqm_uninit(&process->pqm); 313 err_process_pqm_init: 314 hash_del_rcu(&process->kfd_processes); 315 synchronize_rcu(); 316 mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); 317 err_mmu_notifier: 318 mutex_destroy(&process->mutex); 319 kfd_free_process_doorbells(process); 320 err_alloc_doorbells: 321 kfd_pasid_free(process->pasid); 322 err_alloc_pasid: 323 kfree(process); 324 err_alloc_process: 325 return ERR_PTR(err); 326 } 327 328 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 329 struct kfd_process *p) 330 { 331 struct kfd_process_device *pdd = NULL; 332 333 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 334 if (pdd->dev == dev) 335 return pdd; 336 337 return NULL; 338 } 339 340 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 341 struct kfd_process *p) 342 { 343 struct kfd_process_device *pdd = NULL; 344 345 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 346 if (pdd != NULL) { 347 pdd->dev = dev; 348 INIT_LIST_HEAD(&pdd->qpd.queues_list); 349 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 350 pdd->qpd.dqm = dev->dqm; 351 pdd->process = p; 352 pdd->bound = PDD_UNBOUND; 353 pdd->already_dequeued = false; 354 list_add(&pdd->per_device_list, &p->per_device_data); 355 } 356 357 return pdd; 358 } 359 360 /* 361 * Direct the IOMMU to bind the process (specifically the pasid->mm) 362 * to the device. 363 * Unbinding occurs when the process dies or the device is removed. 364 * 365 * Assumes that the process lock is held. 366 */ 367 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 368 struct kfd_process *p) 369 { 370 struct kfd_process_device *pdd; 371 int err; 372 373 pdd = kfd_get_process_device_data(dev, p); 374 if (!pdd) { 375 pr_err("Process device data doesn't exist\n"); 376 return ERR_PTR(-ENOMEM); 377 } 378 379 if (pdd->bound == PDD_BOUND) { 380 return pdd; 381 } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { 382 pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); 383 return ERR_PTR(-EINVAL); 384 } 385 386 err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); 387 if (err < 0) 388 return ERR_PTR(err); 389 390 pdd->bound = PDD_BOUND; 391 392 return pdd; 393 } 394 395 /* 396 * Bind processes do the device that have been temporarily unbound 397 * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. 398 */ 399 int kfd_bind_processes_to_device(struct kfd_dev *dev) 400 { 401 struct kfd_process_device *pdd; 402 struct kfd_process *p; 403 unsigned int temp; 404 int err = 0; 405 406 int idx = srcu_read_lock(&kfd_processes_srcu); 407 408 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 409 mutex_lock(&p->mutex); 410 pdd = kfd_get_process_device_data(dev, p); 411 if (pdd->bound != PDD_BOUND_SUSPENDED) { 412 mutex_unlock(&p->mutex); 413 continue; 414 } 415 416 err = amd_iommu_bind_pasid(dev->pdev, p->pasid, 417 p->lead_thread); 418 if (err < 0) { 419 pr_err("Unexpected pasid %d binding failure\n", 420 p->pasid); 421 mutex_unlock(&p->mutex); 422 break; 423 } 424 425 pdd->bound = PDD_BOUND; 426 mutex_unlock(&p->mutex); 427 } 428 429 srcu_read_unlock(&kfd_processes_srcu, idx); 430 431 return err; 432 } 433 434 /* 435 * Mark currently bound processes as PDD_BOUND_SUSPENDED. These 436 * processes will be restored to PDD_BOUND state in 437 * kfd_bind_processes_to_device. 438 */ 439 void kfd_unbind_processes_from_device(struct kfd_dev *dev) 440 { 441 struct kfd_process_device *pdd; 442 struct kfd_process *p; 443 unsigned int temp; 444 445 int idx = srcu_read_lock(&kfd_processes_srcu); 446 447 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 448 mutex_lock(&p->mutex); 449 pdd = kfd_get_process_device_data(dev, p); 450 451 if (pdd->bound == PDD_BOUND) 452 pdd->bound = PDD_BOUND_SUSPENDED; 453 mutex_unlock(&p->mutex); 454 } 455 456 srcu_read_unlock(&kfd_processes_srcu, idx); 457 } 458 459 void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) 460 { 461 struct kfd_process *p; 462 struct kfd_process_device *pdd; 463 464 /* 465 * Look for the process that matches the pasid. If there is no such 466 * process, we either released it in amdkfd's own notifier, or there 467 * is a bug. Unfortunately, there is no way to tell... 468 */ 469 p = kfd_lookup_process_by_pasid(pasid); 470 if (!p) 471 return; 472 473 pr_debug("Unbinding process %d from IOMMU\n", pasid); 474 475 mutex_lock(kfd_get_dbgmgr_mutex()); 476 477 if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { 478 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { 479 kfd_dbgmgr_destroy(dev->dbgmgr); 480 dev->dbgmgr = NULL; 481 } 482 } 483 484 mutex_unlock(kfd_get_dbgmgr_mutex()); 485 486 pdd = kfd_get_process_device_data(dev, p); 487 if (pdd) 488 /* For GPU relying on IOMMU, we need to dequeue here 489 * when PASID is still bound. 490 */ 491 kfd_process_dequeue_from_device(pdd); 492 493 mutex_unlock(&p->mutex); 494 } 495 496 struct kfd_process_device *kfd_get_first_process_device_data( 497 struct kfd_process *p) 498 { 499 return list_first_entry(&p->per_device_data, 500 struct kfd_process_device, 501 per_device_list); 502 } 503 504 struct kfd_process_device *kfd_get_next_process_device_data( 505 struct kfd_process *p, 506 struct kfd_process_device *pdd) 507 { 508 if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 509 return NULL; 510 return list_next_entry(pdd, per_device_list); 511 } 512 513 bool kfd_has_process_device_data(struct kfd_process *p) 514 { 515 return !(list_empty(&p->per_device_data)); 516 } 517 518 /* This returns with process->mutex locked. */ 519 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 520 { 521 struct kfd_process *p; 522 unsigned int temp; 523 524 int idx = srcu_read_lock(&kfd_processes_srcu); 525 526 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 527 if (p->pasid == pasid) { 528 mutex_lock(&p->mutex); 529 break; 530 } 531 } 532 533 srcu_read_unlock(&kfd_processes_srcu, idx); 534 535 return p; 536 } 537