1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/mutex.h> 24 #include <linux/log2.h> 25 #include <linux/sched.h> 26 #include <linux/sched/mm.h> 27 #include <linux/sched/task.h> 28 #include <linux/slab.h> 29 #include <linux/amd-iommu.h> 30 #include <linux/notifier.h> 31 #include <linux/compat.h> 32 #include <linux/mman.h> 33 #include <linux/file.h> 34 #include "amdgpu_amdkfd.h" 35 #include "amdgpu.h" 36 37 struct mm_struct; 38 39 #include "kfd_priv.h" 40 #include "kfd_device_queue_manager.h" 41 #include "kfd_dbgmgr.h" 42 #include "kfd_iommu.h" 43 44 /* 45 * List of struct kfd_process (field kfd_process). 46 * Unique/indexed by mm_struct* 47 */ 48 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 49 static DEFINE_MUTEX(kfd_processes_mutex); 50 51 DEFINE_SRCU(kfd_processes_srcu); 52 53 /* For process termination handling */ 54 static struct workqueue_struct *kfd_process_wq; 55 56 /* Ordered, single-threaded workqueue for restoring evicted 57 * processes. Restoring multiple processes concurrently under memory 58 * pressure can lead to processes blocking each other from validating 59 * their BOs and result in a live-lock situation where processes 60 * remain evicted indefinitely. 61 */ 62 static struct workqueue_struct *kfd_restore_wq; 63 64 static struct kfd_process *find_process(const struct task_struct *thread); 65 static void kfd_process_ref_release(struct kref *ref); 66 static struct kfd_process *create_process(const struct task_struct *thread); 67 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); 68 69 static void evict_process_worker(struct work_struct *work); 70 static void restore_process_worker(struct work_struct *work); 71 72 struct kfd_procfs_tree { 73 struct kobject *kobj; 74 }; 75 76 static struct kfd_procfs_tree procfs; 77 78 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, 79 char *buffer) 80 { 81 int val = 0; 82 83 if (strcmp(attr->name, "pasid") == 0) { 84 struct kfd_process *p = container_of(attr, struct kfd_process, 85 attr_pasid); 86 val = p->pasid; 87 } else { 88 pr_err("Invalid attribute"); 89 return -EINVAL; 90 } 91 92 return snprintf(buffer, PAGE_SIZE, "%d\n", val); 93 } 94 95 static void kfd_procfs_kobj_release(struct kobject *kobj) 96 { 97 kfree(kobj); 98 } 99 100 static const struct sysfs_ops kfd_procfs_ops = { 101 .show = kfd_procfs_show, 102 }; 103 104 static struct kobj_type procfs_type = { 105 .release = kfd_procfs_kobj_release, 106 .sysfs_ops = &kfd_procfs_ops, 107 }; 108 109 void kfd_procfs_init(void) 110 { 111 int ret = 0; 112 113 procfs.kobj = kfd_alloc_struct(procfs.kobj); 114 if (!procfs.kobj) 115 return; 116 117 ret = kobject_init_and_add(procfs.kobj, &procfs_type, 118 &kfd_device->kobj, "proc"); 119 if (ret) { 120 pr_warn("Could not create procfs proc folder"); 121 /* If we fail to create the procfs, clean up */ 122 kfd_procfs_shutdown(); 123 } 124 } 125 126 void kfd_procfs_shutdown(void) 127 { 128 if (procfs.kobj) { 129 kobject_del(procfs.kobj); 130 kobject_put(procfs.kobj); 131 procfs.kobj = NULL; 132 } 133 } 134 135 int kfd_process_create_wq(void) 136 { 137 if (!kfd_process_wq) 138 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 139 if (!kfd_restore_wq) 140 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); 141 142 if (!kfd_process_wq || !kfd_restore_wq) { 143 kfd_process_destroy_wq(); 144 return -ENOMEM; 145 } 146 147 return 0; 148 } 149 150 void kfd_process_destroy_wq(void) 151 { 152 if (kfd_process_wq) { 153 destroy_workqueue(kfd_process_wq); 154 kfd_process_wq = NULL; 155 } 156 if (kfd_restore_wq) { 157 destroy_workqueue(kfd_restore_wq); 158 kfd_restore_wq = NULL; 159 } 160 } 161 162 static void kfd_process_free_gpuvm(struct kgd_mem *mem, 163 struct kfd_process_device *pdd) 164 { 165 struct kfd_dev *dev = pdd->dev; 166 167 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); 168 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); 169 } 170 171 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process 172 * This function should be only called right after the process 173 * is created and when kfd_processes_mutex is still being held 174 * to avoid concurrency. Because of that exclusiveness, we do 175 * not need to take p->mutex. 176 */ 177 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, 178 uint64_t gpu_va, uint32_t size, 179 uint32_t flags, void **kptr) 180 { 181 struct kfd_dev *kdev = pdd->dev; 182 struct kgd_mem *mem = NULL; 183 int handle; 184 int err; 185 186 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, 187 pdd->vm, &mem, NULL, flags); 188 if (err) 189 goto err_alloc_mem; 190 191 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); 192 if (err) 193 goto err_map_mem; 194 195 err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true); 196 if (err) { 197 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 198 goto sync_memory_failed; 199 } 200 201 /* Create an obj handle so kfd_process_device_remove_obj_handle 202 * will take care of the bo removal when the process finishes. 203 * We do not need to take p->mutex, because the process is just 204 * created and the ioctls have not had the chance to run. 205 */ 206 handle = kfd_process_device_create_obj_handle(pdd, mem); 207 208 if (handle < 0) { 209 err = handle; 210 goto free_gpuvm; 211 } 212 213 if (kptr) { 214 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, 215 (struct kgd_mem *)mem, kptr, NULL); 216 if (err) { 217 pr_debug("Map GTT BO to kernel failed\n"); 218 goto free_obj_handle; 219 } 220 } 221 222 return err; 223 224 free_obj_handle: 225 kfd_process_device_remove_obj_handle(pdd, handle); 226 free_gpuvm: 227 sync_memory_failed: 228 kfd_process_free_gpuvm(mem, pdd); 229 return err; 230 231 err_map_mem: 232 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); 233 err_alloc_mem: 234 *kptr = NULL; 235 return err; 236 } 237 238 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the 239 * process for IB usage The memory reserved is for KFD to submit 240 * IB to AMDGPU from kernel. If the memory is reserved 241 * successfully, ib_kaddr will have the CPU/kernel 242 * address. Check ib_kaddr before accessing the memory. 243 */ 244 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) 245 { 246 struct qcm_process_device *qpd = &pdd->qpd; 247 uint32_t flags = ALLOC_MEM_FLAGS_GTT | 248 ALLOC_MEM_FLAGS_NO_SUBSTITUTE | 249 ALLOC_MEM_FLAGS_WRITABLE | 250 ALLOC_MEM_FLAGS_EXECUTABLE; 251 void *kaddr; 252 int ret; 253 254 if (qpd->ib_kaddr || !qpd->ib_base) 255 return 0; 256 257 /* ib_base is only set for dGPU */ 258 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, 259 &kaddr); 260 if (ret) 261 return ret; 262 263 qpd->ib_kaddr = kaddr; 264 265 return 0; 266 } 267 268 struct kfd_process *kfd_create_process(struct file *filep) 269 { 270 struct kfd_process *process; 271 struct task_struct *thread = current; 272 int ret; 273 274 if (!thread->mm) 275 return ERR_PTR(-EINVAL); 276 277 /* Only the pthreads threading model is supported. */ 278 if (thread->group_leader->mm != thread->mm) 279 return ERR_PTR(-EINVAL); 280 281 /* 282 * take kfd processes mutex before starting of process creation 283 * so there won't be a case where two threads of the same process 284 * create two kfd_process structures 285 */ 286 mutex_lock(&kfd_processes_mutex); 287 288 /* A prior open of /dev/kfd could have already created the process. */ 289 process = find_process(thread); 290 if (process) { 291 pr_debug("Process already found\n"); 292 } else { 293 process = create_process(thread); 294 if (IS_ERR(process)) 295 goto out; 296 297 ret = kfd_process_init_cwsr_apu(process, filep); 298 if (ret) { 299 process = ERR_PTR(ret); 300 goto out; 301 } 302 303 if (!procfs.kobj) 304 goto out; 305 306 process->kobj = kfd_alloc_struct(process->kobj); 307 if (!process->kobj) { 308 pr_warn("Creating procfs kobject failed"); 309 goto out; 310 } 311 ret = kobject_init_and_add(process->kobj, &procfs_type, 312 procfs.kobj, "%d", 313 (int)process->lead_thread->pid); 314 if (ret) { 315 pr_warn("Creating procfs pid directory failed"); 316 goto out; 317 } 318 319 process->attr_pasid.name = "pasid"; 320 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE; 321 sysfs_attr_init(&process->attr_pasid); 322 ret = sysfs_create_file(process->kobj, &process->attr_pasid); 323 if (ret) 324 pr_warn("Creating pasid for pid %d failed", 325 (int)process->lead_thread->pid); 326 } 327 out: 328 if (!IS_ERR(process)) 329 kref_get(&process->ref); 330 mutex_unlock(&kfd_processes_mutex); 331 332 return process; 333 } 334 335 struct kfd_process *kfd_get_process(const struct task_struct *thread) 336 { 337 struct kfd_process *process; 338 339 if (!thread->mm) 340 return ERR_PTR(-EINVAL); 341 342 /* Only the pthreads threading model is supported. */ 343 if (thread->group_leader->mm != thread->mm) 344 return ERR_PTR(-EINVAL); 345 346 process = find_process(thread); 347 if (!process) 348 return ERR_PTR(-EINVAL); 349 350 return process; 351 } 352 353 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 354 { 355 struct kfd_process *process; 356 357 hash_for_each_possible_rcu(kfd_processes_table, process, 358 kfd_processes, (uintptr_t)mm) 359 if (process->mm == mm) 360 return process; 361 362 return NULL; 363 } 364 365 static struct kfd_process *find_process(const struct task_struct *thread) 366 { 367 struct kfd_process *p; 368 int idx; 369 370 idx = srcu_read_lock(&kfd_processes_srcu); 371 p = find_process_by_mm(thread->mm); 372 srcu_read_unlock(&kfd_processes_srcu, idx); 373 374 return p; 375 } 376 377 void kfd_unref_process(struct kfd_process *p) 378 { 379 kref_put(&p->ref, kfd_process_ref_release); 380 } 381 382 static void kfd_process_device_free_bos(struct kfd_process_device *pdd) 383 { 384 struct kfd_process *p = pdd->process; 385 void *mem; 386 int id; 387 388 /* 389 * Remove all handles from idr and release appropriate 390 * local memory object 391 */ 392 idr_for_each_entry(&pdd->alloc_idr, mem, id) { 393 struct kfd_process_device *peer_pdd; 394 395 list_for_each_entry(peer_pdd, &p->per_device_data, 396 per_device_list) { 397 if (!peer_pdd->vm) 398 continue; 399 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 400 peer_pdd->dev->kgd, mem, peer_pdd->vm); 401 } 402 403 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); 404 kfd_process_device_remove_obj_handle(pdd, id); 405 } 406 } 407 408 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) 409 { 410 struct kfd_process_device *pdd; 411 412 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 413 kfd_process_device_free_bos(pdd); 414 } 415 416 static void kfd_process_destroy_pdds(struct kfd_process *p) 417 { 418 struct kfd_process_device *pdd, *temp; 419 420 list_for_each_entry_safe(pdd, temp, &p->per_device_data, 421 per_device_list) { 422 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", 423 pdd->dev->id, p->pasid); 424 425 if (pdd->drm_file) { 426 amdgpu_amdkfd_gpuvm_release_process_vm( 427 pdd->dev->kgd, pdd->vm); 428 fput(pdd->drm_file); 429 } 430 else if (pdd->vm) 431 amdgpu_amdkfd_gpuvm_destroy_process_vm( 432 pdd->dev->kgd, pdd->vm); 433 434 list_del(&pdd->per_device_list); 435 436 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) 437 free_pages((unsigned long)pdd->qpd.cwsr_kaddr, 438 get_order(KFD_CWSR_TBA_TMA_SIZE)); 439 440 kfree(pdd->qpd.doorbell_bitmap); 441 idr_destroy(&pdd->alloc_idr); 442 443 kfree(pdd); 444 } 445 } 446 447 /* No process locking is needed in this function, because the process 448 * is not findable any more. We must assume that no other thread is 449 * using it any more, otherwise we couldn't safely free the process 450 * structure in the end. 451 */ 452 static void kfd_process_wq_release(struct work_struct *work) 453 { 454 struct kfd_process *p = container_of(work, struct kfd_process, 455 release_work); 456 457 /* Remove the procfs files */ 458 if (p->kobj) { 459 sysfs_remove_file(p->kobj, &p->attr_pasid); 460 kobject_del(p->kobj); 461 kobject_put(p->kobj); 462 p->kobj = NULL; 463 } 464 465 kfd_iommu_unbind_process(p); 466 467 kfd_process_free_outstanding_kfd_bos(p); 468 469 kfd_process_destroy_pdds(p); 470 dma_fence_put(p->ef); 471 472 kfd_event_free_process(p); 473 474 kfd_pasid_free(p->pasid); 475 kfd_free_process_doorbells(p); 476 477 mutex_destroy(&p->mutex); 478 479 put_task_struct(p->lead_thread); 480 481 kfree(p); 482 } 483 484 static void kfd_process_ref_release(struct kref *ref) 485 { 486 struct kfd_process *p = container_of(ref, struct kfd_process, ref); 487 488 INIT_WORK(&p->release_work, kfd_process_wq_release); 489 queue_work(kfd_process_wq, &p->release_work); 490 } 491 492 static void kfd_process_free_notifier(struct mmu_notifier *mn) 493 { 494 kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); 495 } 496 497 static void kfd_process_notifier_release(struct mmu_notifier *mn, 498 struct mm_struct *mm) 499 { 500 struct kfd_process *p; 501 struct kfd_process_device *pdd = NULL; 502 503 /* 504 * The kfd_process structure can not be free because the 505 * mmu_notifier srcu is read locked 506 */ 507 p = container_of(mn, struct kfd_process, mmu_notifier); 508 if (WARN_ON(p->mm != mm)) 509 return; 510 511 mutex_lock(&kfd_processes_mutex); 512 hash_del_rcu(&p->kfd_processes); 513 mutex_unlock(&kfd_processes_mutex); 514 synchronize_srcu(&kfd_processes_srcu); 515 516 cancel_delayed_work_sync(&p->eviction_work); 517 cancel_delayed_work_sync(&p->restore_work); 518 519 mutex_lock(&p->mutex); 520 521 /* Iterate over all process device data structures and if the 522 * pdd is in debug mode, we should first force unregistration, 523 * then we will be able to destroy the queues 524 */ 525 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 526 struct kfd_dev *dev = pdd->dev; 527 528 mutex_lock(kfd_get_dbgmgr_mutex()); 529 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { 530 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { 531 kfd_dbgmgr_destroy(dev->dbgmgr); 532 dev->dbgmgr = NULL; 533 } 534 } 535 mutex_unlock(kfd_get_dbgmgr_mutex()); 536 } 537 538 kfd_process_dequeue_from_all_devices(p); 539 pqm_uninit(&p->pqm); 540 541 /* Indicate to other users that MM is no longer valid */ 542 p->mm = NULL; 543 544 mutex_unlock(&p->mutex); 545 546 mmu_notifier_put(&p->mmu_notifier); 547 } 548 549 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 550 .release = kfd_process_notifier_release, 551 .free_notifier = kfd_process_free_notifier, 552 }; 553 554 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) 555 { 556 unsigned long offset; 557 struct kfd_process_device *pdd; 558 559 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 560 struct kfd_dev *dev = pdd->dev; 561 struct qcm_process_device *qpd = &pdd->qpd; 562 563 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) 564 continue; 565 566 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); 567 qpd->tba_addr = (int64_t)vm_mmap(filep, 0, 568 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, 569 MAP_SHARED, offset); 570 571 if (IS_ERR_VALUE(qpd->tba_addr)) { 572 int err = qpd->tba_addr; 573 574 pr_err("Failure to set tba address. error %d.\n", err); 575 qpd->tba_addr = 0; 576 qpd->cwsr_kaddr = NULL; 577 return err; 578 } 579 580 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 581 582 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 583 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", 584 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); 585 } 586 587 return 0; 588 } 589 590 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) 591 { 592 struct kfd_dev *dev = pdd->dev; 593 struct qcm_process_device *qpd = &pdd->qpd; 594 uint32_t flags = ALLOC_MEM_FLAGS_GTT | 595 ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE; 596 void *kaddr; 597 int ret; 598 599 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) 600 return 0; 601 602 /* cwsr_base is only set for dGPU */ 603 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, 604 KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); 605 if (ret) 606 return ret; 607 608 qpd->cwsr_kaddr = kaddr; 609 qpd->tba_addr = qpd->cwsr_base; 610 611 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 612 613 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 614 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", 615 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); 616 617 return 0; 618 } 619 620 /* 621 * On return the kfd_process is fully operational and will be freed when the 622 * mm is released 623 */ 624 static struct kfd_process *create_process(const struct task_struct *thread) 625 { 626 struct kfd_process *process; 627 int err = -ENOMEM; 628 629 process = kzalloc(sizeof(*process), GFP_KERNEL); 630 if (!process) 631 goto err_alloc_process; 632 633 kref_init(&process->ref); 634 mutex_init(&process->mutex); 635 process->mm = thread->mm; 636 process->lead_thread = thread->group_leader; 637 INIT_LIST_HEAD(&process->per_device_data); 638 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); 639 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); 640 process->last_restore_timestamp = get_jiffies_64(); 641 kfd_event_init_process(process); 642 process->is_32bit_user_mode = in_compat_syscall(); 643 644 process->pasid = kfd_pasid_alloc(); 645 if (process->pasid == 0) 646 goto err_alloc_pasid; 647 648 if (kfd_alloc_process_doorbells(process) < 0) 649 goto err_alloc_doorbells; 650 651 err = pqm_init(&process->pqm, process); 652 if (err != 0) 653 goto err_process_pqm_init; 654 655 /* init process apertures*/ 656 err = kfd_init_apertures(process); 657 if (err != 0) 658 goto err_init_apertures; 659 660 /* Must be last, have to use release destruction after this */ 661 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 662 err = mmu_notifier_register(&process->mmu_notifier, process->mm); 663 if (err) 664 goto err_register_notifier; 665 666 get_task_struct(process->lead_thread); 667 hash_add_rcu(kfd_processes_table, &process->kfd_processes, 668 (uintptr_t)process->mm); 669 670 return process; 671 672 err_register_notifier: 673 kfd_process_free_outstanding_kfd_bos(process); 674 kfd_process_destroy_pdds(process); 675 err_init_apertures: 676 pqm_uninit(&process->pqm); 677 err_process_pqm_init: 678 kfd_free_process_doorbells(process); 679 err_alloc_doorbells: 680 kfd_pasid_free(process->pasid); 681 err_alloc_pasid: 682 mutex_destroy(&process->mutex); 683 kfree(process); 684 err_alloc_process: 685 return ERR_PTR(err); 686 } 687 688 static int init_doorbell_bitmap(struct qcm_process_device *qpd, 689 struct kfd_dev *dev) 690 { 691 unsigned int i; 692 int range_start = dev->shared_resources.non_cp_doorbells_start; 693 int range_end = dev->shared_resources.non_cp_doorbells_end; 694 695 if (!KFD_IS_SOC15(dev->device_info->asic_family)) 696 return 0; 697 698 qpd->doorbell_bitmap = 699 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, 700 BITS_PER_BYTE), GFP_KERNEL); 701 if (!qpd->doorbell_bitmap) 702 return -ENOMEM; 703 704 /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ 705 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end); 706 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", 707 range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, 708 range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); 709 710 for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { 711 if (i >= range_start && i <= range_end) { 712 set_bit(i, qpd->doorbell_bitmap); 713 set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, 714 qpd->doorbell_bitmap); 715 } 716 } 717 718 return 0; 719 } 720 721 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 722 struct kfd_process *p) 723 { 724 struct kfd_process_device *pdd = NULL; 725 726 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 727 if (pdd->dev == dev) 728 return pdd; 729 730 return NULL; 731 } 732 733 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 734 struct kfd_process *p) 735 { 736 struct kfd_process_device *pdd = NULL; 737 738 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 739 if (!pdd) 740 return NULL; 741 742 if (init_doorbell_bitmap(&pdd->qpd, dev)) { 743 pr_err("Failed to init doorbell for process\n"); 744 kfree(pdd); 745 return NULL; 746 } 747 748 pdd->dev = dev; 749 INIT_LIST_HEAD(&pdd->qpd.queues_list); 750 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 751 pdd->qpd.dqm = dev->dqm; 752 pdd->qpd.pqm = &p->pqm; 753 pdd->qpd.evicted = 0; 754 pdd->process = p; 755 pdd->bound = PDD_UNBOUND; 756 pdd->already_dequeued = false; 757 list_add(&pdd->per_device_list, &p->per_device_data); 758 759 /* Init idr used for memory handle translation */ 760 idr_init(&pdd->alloc_idr); 761 762 return pdd; 763 } 764 765 /** 766 * kfd_process_device_init_vm - Initialize a VM for a process-device 767 * 768 * @pdd: The process-device 769 * @drm_file: Optional pointer to a DRM file descriptor 770 * 771 * If @drm_file is specified, it will be used to acquire the VM from 772 * that file descriptor. If successful, the @pdd takes ownership of 773 * the file descriptor. 774 * 775 * If @drm_file is NULL, a new VM is created. 776 * 777 * Returns 0 on success, -errno on failure. 778 */ 779 int kfd_process_device_init_vm(struct kfd_process_device *pdd, 780 struct file *drm_file) 781 { 782 struct kfd_process *p; 783 struct kfd_dev *dev; 784 int ret; 785 786 if (pdd->vm) 787 return drm_file ? -EBUSY : 0; 788 789 p = pdd->process; 790 dev = pdd->dev; 791 792 if (drm_file) 793 ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( 794 dev->kgd, drm_file, p->pasid, 795 &pdd->vm, &p->kgd_process_info, &p->ef); 796 else 797 ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, 798 &pdd->vm, &p->kgd_process_info, &p->ef); 799 if (ret) { 800 pr_err("Failed to create process VM object\n"); 801 return ret; 802 } 803 804 amdgpu_vm_set_task_info(pdd->vm); 805 806 ret = kfd_process_device_reserve_ib_mem(pdd); 807 if (ret) 808 goto err_reserve_ib_mem; 809 ret = kfd_process_device_init_cwsr_dgpu(pdd); 810 if (ret) 811 goto err_init_cwsr; 812 813 pdd->drm_file = drm_file; 814 815 return 0; 816 817 err_init_cwsr: 818 err_reserve_ib_mem: 819 kfd_process_device_free_bos(pdd); 820 if (!drm_file) 821 amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); 822 pdd->vm = NULL; 823 824 return ret; 825 } 826 827 /* 828 * Direct the IOMMU to bind the process (specifically the pasid->mm) 829 * to the device. 830 * Unbinding occurs when the process dies or the device is removed. 831 * 832 * Assumes that the process lock is held. 833 */ 834 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 835 struct kfd_process *p) 836 { 837 struct kfd_process_device *pdd; 838 int err; 839 840 pdd = kfd_get_process_device_data(dev, p); 841 if (!pdd) { 842 pr_err("Process device data doesn't exist\n"); 843 return ERR_PTR(-ENOMEM); 844 } 845 846 err = kfd_iommu_bind_process_to_device(pdd); 847 if (err) 848 return ERR_PTR(err); 849 850 err = kfd_process_device_init_vm(pdd, NULL); 851 if (err) 852 return ERR_PTR(err); 853 854 return pdd; 855 } 856 857 struct kfd_process_device *kfd_get_first_process_device_data( 858 struct kfd_process *p) 859 { 860 return list_first_entry(&p->per_device_data, 861 struct kfd_process_device, 862 per_device_list); 863 } 864 865 struct kfd_process_device *kfd_get_next_process_device_data( 866 struct kfd_process *p, 867 struct kfd_process_device *pdd) 868 { 869 if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 870 return NULL; 871 return list_next_entry(pdd, per_device_list); 872 } 873 874 bool kfd_has_process_device_data(struct kfd_process *p) 875 { 876 return !(list_empty(&p->per_device_data)); 877 } 878 879 /* Create specific handle mapped to mem from process local memory idr 880 * Assumes that the process lock is held. 881 */ 882 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, 883 void *mem) 884 { 885 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); 886 } 887 888 /* Translate specific handle from process local memory idr 889 * Assumes that the process lock is held. 890 */ 891 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, 892 int handle) 893 { 894 if (handle < 0) 895 return NULL; 896 897 return idr_find(&pdd->alloc_idr, handle); 898 } 899 900 /* Remove specific handle from process local memory idr 901 * Assumes that the process lock is held. 902 */ 903 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, 904 int handle) 905 { 906 if (handle >= 0) 907 idr_remove(&pdd->alloc_idr, handle); 908 } 909 910 /* This increments the process->ref counter. */ 911 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 912 { 913 struct kfd_process *p, *ret_p = NULL; 914 unsigned int temp; 915 916 int idx = srcu_read_lock(&kfd_processes_srcu); 917 918 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 919 if (p->pasid == pasid) { 920 kref_get(&p->ref); 921 ret_p = p; 922 break; 923 } 924 } 925 926 srcu_read_unlock(&kfd_processes_srcu, idx); 927 928 return ret_p; 929 } 930 931 /* This increments the process->ref counter. */ 932 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) 933 { 934 struct kfd_process *p; 935 936 int idx = srcu_read_lock(&kfd_processes_srcu); 937 938 p = find_process_by_mm(mm); 939 if (p) 940 kref_get(&p->ref); 941 942 srcu_read_unlock(&kfd_processes_srcu, idx); 943 944 return p; 945 } 946 947 /* process_evict_queues - Evict all user queues of a process 948 * 949 * Eviction is reference-counted per process-device. This means multiple 950 * evictions from different sources can be nested safely. 951 */ 952 int kfd_process_evict_queues(struct kfd_process *p) 953 { 954 struct kfd_process_device *pdd; 955 int r = 0; 956 unsigned int n_evicted = 0; 957 958 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 959 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, 960 &pdd->qpd); 961 if (r) { 962 pr_err("Failed to evict process queues\n"); 963 goto fail; 964 } 965 n_evicted++; 966 } 967 968 return r; 969 970 fail: 971 /* To keep state consistent, roll back partial eviction by 972 * restoring queues 973 */ 974 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 975 if (n_evicted == 0) 976 break; 977 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 978 &pdd->qpd)) 979 pr_err("Failed to restore queues\n"); 980 981 n_evicted--; 982 } 983 984 return r; 985 } 986 987 /* process_restore_queues - Restore all user queues of a process */ 988 int kfd_process_restore_queues(struct kfd_process *p) 989 { 990 struct kfd_process_device *pdd; 991 int r, ret = 0; 992 993 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 994 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 995 &pdd->qpd); 996 if (r) { 997 pr_err("Failed to restore process queues\n"); 998 if (!ret) 999 ret = r; 1000 } 1001 } 1002 1003 return ret; 1004 } 1005 1006 static void evict_process_worker(struct work_struct *work) 1007 { 1008 int ret; 1009 struct kfd_process *p; 1010 struct delayed_work *dwork; 1011 1012 dwork = to_delayed_work(work); 1013 1014 /* Process termination destroys this worker thread. So during the 1015 * lifetime of this thread, kfd_process p will be valid 1016 */ 1017 p = container_of(dwork, struct kfd_process, eviction_work); 1018 WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, 1019 "Eviction fence mismatch\n"); 1020 1021 /* Narrow window of overlap between restore and evict work 1022 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos 1023 * unreserves KFD BOs, it is possible to evicted again. But 1024 * restore has few more steps of finish. So lets wait for any 1025 * previous restore work to complete 1026 */ 1027 flush_delayed_work(&p->restore_work); 1028 1029 pr_debug("Started evicting pasid 0x%x\n", p->pasid); 1030 ret = kfd_process_evict_queues(p); 1031 if (!ret) { 1032 dma_fence_signal(p->ef); 1033 dma_fence_put(p->ef); 1034 p->ef = NULL; 1035 queue_delayed_work(kfd_restore_wq, &p->restore_work, 1036 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); 1037 1038 pr_debug("Finished evicting pasid 0x%x\n", p->pasid); 1039 } else 1040 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); 1041 } 1042 1043 static void restore_process_worker(struct work_struct *work) 1044 { 1045 struct delayed_work *dwork; 1046 struct kfd_process *p; 1047 int ret = 0; 1048 1049 dwork = to_delayed_work(work); 1050 1051 /* Process termination destroys this worker thread. So during the 1052 * lifetime of this thread, kfd_process p will be valid 1053 */ 1054 p = container_of(dwork, struct kfd_process, restore_work); 1055 pr_debug("Started restoring pasid 0x%x\n", p->pasid); 1056 1057 /* Setting last_restore_timestamp before successful restoration. 1058 * Otherwise this would have to be set by KGD (restore_process_bos) 1059 * before KFD BOs are unreserved. If not, the process can be evicted 1060 * again before the timestamp is set. 1061 * If restore fails, the timestamp will be set again in the next 1062 * attempt. This would mean that the minimum GPU quanta would be 1063 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two 1064 * functions) 1065 */ 1066 1067 p->last_restore_timestamp = get_jiffies_64(); 1068 ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, 1069 &p->ef); 1070 if (ret) { 1071 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", 1072 p->pasid, PROCESS_BACK_OFF_TIME_MS); 1073 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, 1074 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); 1075 WARN(!ret, "reschedule restore work failed\n"); 1076 return; 1077 } 1078 1079 ret = kfd_process_restore_queues(p); 1080 if (!ret) 1081 pr_debug("Finished restoring pasid 0x%x\n", p->pasid); 1082 else 1083 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); 1084 } 1085 1086 void kfd_suspend_all_processes(void) 1087 { 1088 struct kfd_process *p; 1089 unsigned int temp; 1090 int idx = srcu_read_lock(&kfd_processes_srcu); 1091 1092 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1093 cancel_delayed_work_sync(&p->eviction_work); 1094 cancel_delayed_work_sync(&p->restore_work); 1095 1096 if (kfd_process_evict_queues(p)) 1097 pr_err("Failed to suspend process 0x%x\n", p->pasid); 1098 dma_fence_signal(p->ef); 1099 dma_fence_put(p->ef); 1100 p->ef = NULL; 1101 } 1102 srcu_read_unlock(&kfd_processes_srcu, idx); 1103 } 1104 1105 int kfd_resume_all_processes(void) 1106 { 1107 struct kfd_process *p; 1108 unsigned int temp; 1109 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); 1110 1111 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1112 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { 1113 pr_err("Restore process %d failed during resume\n", 1114 p->pasid); 1115 ret = -EFAULT; 1116 } 1117 } 1118 srcu_read_unlock(&kfd_processes_srcu, idx); 1119 return ret; 1120 } 1121 1122 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 1123 struct vm_area_struct *vma) 1124 { 1125 struct kfd_process_device *pdd; 1126 struct qcm_process_device *qpd; 1127 1128 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { 1129 pr_err("Incorrect CWSR mapping size.\n"); 1130 return -EINVAL; 1131 } 1132 1133 pdd = kfd_get_process_device_data(dev, process); 1134 if (!pdd) 1135 return -EINVAL; 1136 qpd = &pdd->qpd; 1137 1138 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1139 get_order(KFD_CWSR_TBA_TMA_SIZE)); 1140 if (!qpd->cwsr_kaddr) { 1141 pr_err("Error allocating per process CWSR buffer.\n"); 1142 return -ENOMEM; 1143 } 1144 1145 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND 1146 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; 1147 /* Mapping pages to user process */ 1148 return remap_pfn_range(vma, vma->vm_start, 1149 PFN_DOWN(__pa(qpd->cwsr_kaddr)), 1150 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); 1151 } 1152 1153 void kfd_flush_tlb(struct kfd_process_device *pdd) 1154 { 1155 struct kfd_dev *dev = pdd->dev; 1156 1157 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 1158 /* Nothing to flush until a VMID is assigned, which 1159 * only happens when the first queue is created. 1160 */ 1161 if (pdd->qpd.vmid) 1162 amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, 1163 pdd->qpd.vmid); 1164 } else { 1165 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, 1166 pdd->process->pasid); 1167 } 1168 } 1169 1170 #if defined(CONFIG_DEBUG_FS) 1171 1172 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) 1173 { 1174 struct kfd_process *p; 1175 unsigned int temp; 1176 int r = 0; 1177 1178 int idx = srcu_read_lock(&kfd_processes_srcu); 1179 1180 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1181 seq_printf(m, "Process %d PASID 0x%x:\n", 1182 p->lead_thread->tgid, p->pasid); 1183 1184 mutex_lock(&p->mutex); 1185 r = pqm_debugfs_mqds(m, &p->pqm); 1186 mutex_unlock(&p->mutex); 1187 1188 if (r) 1189 break; 1190 } 1191 1192 srcu_read_unlock(&kfd_processes_srcu, idx); 1193 1194 return r; 1195 } 1196 1197 #endif 1198 1199