1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/mutex.h> 24 #include <linux/log2.h> 25 #include <linux/sched.h> 26 #include <linux/sched/mm.h> 27 #include <linux/sched/task.h> 28 #include <linux/mmu_context.h> 29 #include <linux/slab.h> 30 #include <linux/amd-iommu.h> 31 #include <linux/notifier.h> 32 #include <linux/compat.h> 33 #include <linux/mman.h> 34 #include <linux/file.h> 35 #include <linux/pm_runtime.h> 36 #include "amdgpu_amdkfd.h" 37 #include "amdgpu.h" 38 39 struct mm_struct; 40 41 #include "kfd_priv.h" 42 #include "kfd_device_queue_manager.h" 43 #include "kfd_dbgmgr.h" 44 #include "kfd_iommu.h" 45 46 /* 47 * List of struct kfd_process (field kfd_process). 48 * Unique/indexed by mm_struct* 49 */ 50 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 51 static DEFINE_MUTEX(kfd_processes_mutex); 52 53 DEFINE_SRCU(kfd_processes_srcu); 54 55 /* For process termination handling */ 56 static struct workqueue_struct *kfd_process_wq; 57 58 /* Ordered, single-threaded workqueue for restoring evicted 59 * processes. Restoring multiple processes concurrently under memory 60 * pressure can lead to processes blocking each other from validating 61 * their BOs and result in a live-lock situation where processes 62 * remain evicted indefinitely. 63 */ 64 static struct workqueue_struct *kfd_restore_wq; 65 66 static struct kfd_process *find_process(const struct task_struct *thread); 67 static void kfd_process_ref_release(struct kref *ref); 68 static struct kfd_process *create_process(const struct task_struct *thread); 69 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); 70 71 static void evict_process_worker(struct work_struct *work); 72 static void restore_process_worker(struct work_struct *work); 73 74 struct kfd_procfs_tree { 75 struct kobject *kobj; 76 }; 77 78 static struct kfd_procfs_tree procfs; 79 80 /* 81 * Structure for SDMA activity tracking 82 */ 83 struct kfd_sdma_activity_handler_workarea { 84 struct work_struct sdma_activity_work; 85 struct kfd_process_device *pdd; 86 uint64_t sdma_activity_counter; 87 }; 88 89 static void kfd_sdma_activity_worker(struct work_struct *work) 90 { 91 struct kfd_sdma_activity_handler_workarea *workarea; 92 struct kfd_process_device *pdd; 93 uint64_t val; 94 struct mm_struct *mm; 95 struct queue *q; 96 struct qcm_process_device *qpd; 97 struct device_queue_manager *dqm; 98 int ret = 0; 99 100 workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, 101 sdma_activity_work); 102 if (!workarea) 103 return; 104 105 pdd = workarea->pdd; 106 if (!pdd) 107 return; 108 dqm = pdd->dev->dqm; 109 qpd = &pdd->qpd; 110 if (!dqm || !qpd) 111 return; 112 113 mm = get_task_mm(pdd->process->lead_thread); 114 if (!mm) { 115 return; 116 } 117 118 use_mm(mm); 119 120 dqm_lock(dqm); 121 122 /* 123 * Total SDMA activity is current SDMA activity + past SDMA activity 124 */ 125 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; 126 127 /* 128 * Get the current activity counters for all active SDMA queues 129 */ 130 list_for_each_entry(q, &qpd->queues_list, list) { 131 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 132 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 133 val = 0; 134 ret = read_sdma_queue_counter(q, &val); 135 if (ret) 136 pr_debug("Failed to read SDMA queue active " 137 "counter for queue id: %d", 138 q->properties.queue_id); 139 else 140 workarea->sdma_activity_counter += val; 141 } 142 } 143 144 dqm_unlock(dqm); 145 unuse_mm(mm); 146 mmput(mm); 147 } 148 149 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, 150 char *buffer) 151 { 152 if (strcmp(attr->name, "pasid") == 0) { 153 struct kfd_process *p = container_of(attr, struct kfd_process, 154 attr_pasid); 155 156 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid); 157 } else if (strncmp(attr->name, "vram_", 5) == 0) { 158 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, 159 attr_vram); 160 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); 161 } else if (strncmp(attr->name, "sdma_", 5) == 0) { 162 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, 163 attr_sdma); 164 struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler; 165 166 INIT_WORK(&sdma_activity_work_handler.sdma_activity_work, 167 kfd_sdma_activity_worker); 168 169 sdma_activity_work_handler.pdd = pdd; 170 171 schedule_work(&sdma_activity_work_handler.sdma_activity_work); 172 173 flush_work(&sdma_activity_work_handler.sdma_activity_work); 174 175 return snprintf(buffer, PAGE_SIZE, "%llu\n", 176 (sdma_activity_work_handler.sdma_activity_counter)/ 177 SDMA_ACTIVITY_DIVISOR); 178 } else { 179 pr_err("Invalid attribute"); 180 return -EINVAL; 181 } 182 183 return 0; 184 } 185 186 static void kfd_procfs_kobj_release(struct kobject *kobj) 187 { 188 kfree(kobj); 189 } 190 191 static const struct sysfs_ops kfd_procfs_ops = { 192 .show = kfd_procfs_show, 193 }; 194 195 static struct kobj_type procfs_type = { 196 .release = kfd_procfs_kobj_release, 197 .sysfs_ops = &kfd_procfs_ops, 198 }; 199 200 void kfd_procfs_init(void) 201 { 202 int ret = 0; 203 204 procfs.kobj = kfd_alloc_struct(procfs.kobj); 205 if (!procfs.kobj) 206 return; 207 208 ret = kobject_init_and_add(procfs.kobj, &procfs_type, 209 &kfd_device->kobj, "proc"); 210 if (ret) { 211 pr_warn("Could not create procfs proc folder"); 212 /* If we fail to create the procfs, clean up */ 213 kfd_procfs_shutdown(); 214 } 215 } 216 217 void kfd_procfs_shutdown(void) 218 { 219 if (procfs.kobj) { 220 kobject_del(procfs.kobj); 221 kobject_put(procfs.kobj); 222 procfs.kobj = NULL; 223 } 224 } 225 226 static ssize_t kfd_procfs_queue_show(struct kobject *kobj, 227 struct attribute *attr, char *buffer) 228 { 229 struct queue *q = container_of(kobj, struct queue, kobj); 230 231 if (!strcmp(attr->name, "size")) 232 return snprintf(buffer, PAGE_SIZE, "%llu", 233 q->properties.queue_size); 234 else if (!strcmp(attr->name, "type")) 235 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type); 236 else if (!strcmp(attr->name, "gpuid")) 237 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id); 238 else 239 pr_err("Invalid attribute"); 240 241 return 0; 242 } 243 244 static struct attribute attr_queue_size = { 245 .name = "size", 246 .mode = KFD_SYSFS_FILE_MODE 247 }; 248 249 static struct attribute attr_queue_type = { 250 .name = "type", 251 .mode = KFD_SYSFS_FILE_MODE 252 }; 253 254 static struct attribute attr_queue_gpuid = { 255 .name = "gpuid", 256 .mode = KFD_SYSFS_FILE_MODE 257 }; 258 259 static struct attribute *procfs_queue_attrs[] = { 260 &attr_queue_size, 261 &attr_queue_type, 262 &attr_queue_gpuid, 263 NULL 264 }; 265 266 static const struct sysfs_ops procfs_queue_ops = { 267 .show = kfd_procfs_queue_show, 268 }; 269 270 static struct kobj_type procfs_queue_type = { 271 .sysfs_ops = &procfs_queue_ops, 272 .default_attrs = procfs_queue_attrs, 273 }; 274 275 int kfd_procfs_add_queue(struct queue *q) 276 { 277 struct kfd_process *proc; 278 int ret; 279 280 if (!q || !q->process) 281 return -EINVAL; 282 proc = q->process; 283 284 /* Create proc/<pid>/queues/<queue id> folder */ 285 if (!proc->kobj_queues) 286 return -EFAULT; 287 ret = kobject_init_and_add(&q->kobj, &procfs_queue_type, 288 proc->kobj_queues, "%u", q->properties.queue_id); 289 if (ret < 0) { 290 pr_warn("Creating proc/<pid>/queues/%u failed", 291 q->properties.queue_id); 292 kobject_put(&q->kobj); 293 return ret; 294 } 295 296 return 0; 297 } 298 299 static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, 300 char *name) 301 { 302 int ret = 0; 303 304 if (!p || !attr || !name) 305 return -EINVAL; 306 307 attr->name = name; 308 attr->mode = KFD_SYSFS_FILE_MODE; 309 sysfs_attr_init(attr); 310 311 ret = sysfs_create_file(p->kobj, attr); 312 313 return ret; 314 } 315 316 int kfd_procfs_add_sysfs_files(struct kfd_process *p) 317 { 318 int ret = 0; 319 struct kfd_process_device *pdd; 320 321 if (!p) 322 return -EINVAL; 323 324 if (!p->kobj) 325 return -EFAULT; 326 327 /* 328 * Create sysfs files for each GPU: 329 * - proc/<pid>/vram_<gpuid> 330 * - proc/<pid>/sdma_<gpuid> 331 */ 332 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 333 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", 334 pdd->dev->id); 335 ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename); 336 if (ret) 337 pr_warn("Creating vram usage for gpu id %d failed", 338 (int)pdd->dev->id); 339 340 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", 341 pdd->dev->id); 342 ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename); 343 if (ret) 344 pr_warn("Creating sdma usage for gpu id %d failed", 345 (int)pdd->dev->id); 346 } 347 348 return ret; 349 } 350 351 352 void kfd_procfs_del_queue(struct queue *q) 353 { 354 if (!q) 355 return; 356 357 kobject_del(&q->kobj); 358 kobject_put(&q->kobj); 359 } 360 361 int kfd_process_create_wq(void) 362 { 363 if (!kfd_process_wq) 364 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 365 if (!kfd_restore_wq) 366 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); 367 368 if (!kfd_process_wq || !kfd_restore_wq) { 369 kfd_process_destroy_wq(); 370 return -ENOMEM; 371 } 372 373 return 0; 374 } 375 376 void kfd_process_destroy_wq(void) 377 { 378 if (kfd_process_wq) { 379 destroy_workqueue(kfd_process_wq); 380 kfd_process_wq = NULL; 381 } 382 if (kfd_restore_wq) { 383 destroy_workqueue(kfd_restore_wq); 384 kfd_restore_wq = NULL; 385 } 386 } 387 388 static void kfd_process_free_gpuvm(struct kgd_mem *mem, 389 struct kfd_process_device *pdd) 390 { 391 struct kfd_dev *dev = pdd->dev; 392 393 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); 394 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL); 395 } 396 397 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process 398 * This function should be only called right after the process 399 * is created and when kfd_processes_mutex is still being held 400 * to avoid concurrency. Because of that exclusiveness, we do 401 * not need to take p->mutex. 402 */ 403 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, 404 uint64_t gpu_va, uint32_t size, 405 uint32_t flags, void **kptr) 406 { 407 struct kfd_dev *kdev = pdd->dev; 408 struct kgd_mem *mem = NULL; 409 int handle; 410 int err; 411 412 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, 413 pdd->vm, &mem, NULL, flags); 414 if (err) 415 goto err_alloc_mem; 416 417 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); 418 if (err) 419 goto err_map_mem; 420 421 err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true); 422 if (err) { 423 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 424 goto sync_memory_failed; 425 } 426 427 /* Create an obj handle so kfd_process_device_remove_obj_handle 428 * will take care of the bo removal when the process finishes. 429 * We do not need to take p->mutex, because the process is just 430 * created and the ioctls have not had the chance to run. 431 */ 432 handle = kfd_process_device_create_obj_handle(pdd, mem); 433 434 if (handle < 0) { 435 err = handle; 436 goto free_gpuvm; 437 } 438 439 if (kptr) { 440 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, 441 (struct kgd_mem *)mem, kptr, NULL); 442 if (err) { 443 pr_debug("Map GTT BO to kernel failed\n"); 444 goto free_obj_handle; 445 } 446 } 447 448 return err; 449 450 free_obj_handle: 451 kfd_process_device_remove_obj_handle(pdd, handle); 452 free_gpuvm: 453 sync_memory_failed: 454 kfd_process_free_gpuvm(mem, pdd); 455 return err; 456 457 err_map_mem: 458 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL); 459 err_alloc_mem: 460 *kptr = NULL; 461 return err; 462 } 463 464 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the 465 * process for IB usage The memory reserved is for KFD to submit 466 * IB to AMDGPU from kernel. If the memory is reserved 467 * successfully, ib_kaddr will have the CPU/kernel 468 * address. Check ib_kaddr before accessing the memory. 469 */ 470 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) 471 { 472 struct qcm_process_device *qpd = &pdd->qpd; 473 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | 474 KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE | 475 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | 476 KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; 477 void *kaddr; 478 int ret; 479 480 if (qpd->ib_kaddr || !qpd->ib_base) 481 return 0; 482 483 /* ib_base is only set for dGPU */ 484 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, 485 &kaddr); 486 if (ret) 487 return ret; 488 489 qpd->ib_kaddr = kaddr; 490 491 return 0; 492 } 493 494 struct kfd_process *kfd_create_process(struct file *filep) 495 { 496 struct kfd_process *process; 497 struct task_struct *thread = current; 498 int ret; 499 500 if (!thread->mm) 501 return ERR_PTR(-EINVAL); 502 503 /* Only the pthreads threading model is supported. */ 504 if (thread->group_leader->mm != thread->mm) 505 return ERR_PTR(-EINVAL); 506 507 /* 508 * take kfd processes mutex before starting of process creation 509 * so there won't be a case where two threads of the same process 510 * create two kfd_process structures 511 */ 512 mutex_lock(&kfd_processes_mutex); 513 514 /* A prior open of /dev/kfd could have already created the process. */ 515 process = find_process(thread); 516 if (process) { 517 pr_debug("Process already found\n"); 518 } else { 519 process = create_process(thread); 520 if (IS_ERR(process)) 521 goto out; 522 523 ret = kfd_process_init_cwsr_apu(process, filep); 524 if (ret) { 525 process = ERR_PTR(ret); 526 goto out; 527 } 528 529 if (!procfs.kobj) 530 goto out; 531 532 process->kobj = kfd_alloc_struct(process->kobj); 533 if (!process->kobj) { 534 pr_warn("Creating procfs kobject failed"); 535 goto out; 536 } 537 ret = kobject_init_and_add(process->kobj, &procfs_type, 538 procfs.kobj, "%d", 539 (int)process->lead_thread->pid); 540 if (ret) { 541 pr_warn("Creating procfs pid directory failed"); 542 goto out; 543 } 544 545 process->attr_pasid.name = "pasid"; 546 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE; 547 sysfs_attr_init(&process->attr_pasid); 548 ret = sysfs_create_file(process->kobj, &process->attr_pasid); 549 if (ret) 550 pr_warn("Creating pasid for pid %d failed", 551 (int)process->lead_thread->pid); 552 553 process->kobj_queues = kobject_create_and_add("queues", 554 process->kobj); 555 if (!process->kobj_queues) 556 pr_warn("Creating KFD proc/queues folder failed"); 557 558 ret = kfd_procfs_add_sysfs_files(process); 559 if (ret) 560 pr_warn("Creating sysfs usage file for pid %d failed", 561 (int)process->lead_thread->pid); 562 } 563 out: 564 if (!IS_ERR(process)) 565 kref_get(&process->ref); 566 mutex_unlock(&kfd_processes_mutex); 567 568 return process; 569 } 570 571 struct kfd_process *kfd_get_process(const struct task_struct *thread) 572 { 573 struct kfd_process *process; 574 575 if (!thread->mm) 576 return ERR_PTR(-EINVAL); 577 578 /* Only the pthreads threading model is supported. */ 579 if (thread->group_leader->mm != thread->mm) 580 return ERR_PTR(-EINVAL); 581 582 process = find_process(thread); 583 if (!process) 584 return ERR_PTR(-EINVAL); 585 586 return process; 587 } 588 589 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 590 { 591 struct kfd_process *process; 592 593 hash_for_each_possible_rcu(kfd_processes_table, process, 594 kfd_processes, (uintptr_t)mm) 595 if (process->mm == mm) 596 return process; 597 598 return NULL; 599 } 600 601 static struct kfd_process *find_process(const struct task_struct *thread) 602 { 603 struct kfd_process *p; 604 int idx; 605 606 idx = srcu_read_lock(&kfd_processes_srcu); 607 p = find_process_by_mm(thread->mm); 608 srcu_read_unlock(&kfd_processes_srcu, idx); 609 610 return p; 611 } 612 613 void kfd_unref_process(struct kfd_process *p) 614 { 615 kref_put(&p->ref, kfd_process_ref_release); 616 } 617 618 static void kfd_process_device_free_bos(struct kfd_process_device *pdd) 619 { 620 struct kfd_process *p = pdd->process; 621 void *mem; 622 int id; 623 624 /* 625 * Remove all handles from idr and release appropriate 626 * local memory object 627 */ 628 idr_for_each_entry(&pdd->alloc_idr, mem, id) { 629 struct kfd_process_device *peer_pdd; 630 631 list_for_each_entry(peer_pdd, &p->per_device_data, 632 per_device_list) { 633 if (!peer_pdd->vm) 634 continue; 635 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 636 peer_pdd->dev->kgd, mem, peer_pdd->vm); 637 } 638 639 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL); 640 kfd_process_device_remove_obj_handle(pdd, id); 641 } 642 } 643 644 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) 645 { 646 struct kfd_process_device *pdd; 647 648 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 649 kfd_process_device_free_bos(pdd); 650 } 651 652 static void kfd_process_destroy_pdds(struct kfd_process *p) 653 { 654 struct kfd_process_device *pdd, *temp; 655 656 list_for_each_entry_safe(pdd, temp, &p->per_device_data, 657 per_device_list) { 658 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", 659 pdd->dev->id, p->pasid); 660 661 if (pdd->drm_file) { 662 amdgpu_amdkfd_gpuvm_release_process_vm( 663 pdd->dev->kgd, pdd->vm); 664 fput(pdd->drm_file); 665 } 666 else if (pdd->vm) 667 amdgpu_amdkfd_gpuvm_destroy_process_vm( 668 pdd->dev->kgd, pdd->vm); 669 670 list_del(&pdd->per_device_list); 671 672 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) 673 free_pages((unsigned long)pdd->qpd.cwsr_kaddr, 674 get_order(KFD_CWSR_TBA_TMA_SIZE)); 675 676 kfree(pdd->qpd.doorbell_bitmap); 677 idr_destroy(&pdd->alloc_idr); 678 679 /* 680 * before destroying pdd, make sure to report availability 681 * for auto suspend 682 */ 683 if (pdd->runtime_inuse) { 684 pm_runtime_mark_last_busy(pdd->dev->ddev->dev); 685 pm_runtime_put_autosuspend(pdd->dev->ddev->dev); 686 pdd->runtime_inuse = false; 687 } 688 689 kfree(pdd); 690 } 691 } 692 693 /* No process locking is needed in this function, because the process 694 * is not findable any more. We must assume that no other thread is 695 * using it any more, otherwise we couldn't safely free the process 696 * structure in the end. 697 */ 698 static void kfd_process_wq_release(struct work_struct *work) 699 { 700 struct kfd_process *p = container_of(work, struct kfd_process, 701 release_work); 702 struct kfd_process_device *pdd; 703 704 /* Remove the procfs files */ 705 if (p->kobj) { 706 sysfs_remove_file(p->kobj, &p->attr_pasid); 707 kobject_del(p->kobj_queues); 708 kobject_put(p->kobj_queues); 709 p->kobj_queues = NULL; 710 711 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 712 sysfs_remove_file(p->kobj, &pdd->attr_vram); 713 sysfs_remove_file(p->kobj, &pdd->attr_sdma); 714 } 715 716 kobject_del(p->kobj); 717 kobject_put(p->kobj); 718 p->kobj = NULL; 719 } 720 721 kfd_iommu_unbind_process(p); 722 723 kfd_process_free_outstanding_kfd_bos(p); 724 725 kfd_process_destroy_pdds(p); 726 dma_fence_put(p->ef); 727 728 kfd_event_free_process(p); 729 730 kfd_pasid_free(p->pasid); 731 kfd_free_process_doorbells(p); 732 733 mutex_destroy(&p->mutex); 734 735 put_task_struct(p->lead_thread); 736 737 kfree(p); 738 } 739 740 static void kfd_process_ref_release(struct kref *ref) 741 { 742 struct kfd_process *p = container_of(ref, struct kfd_process, ref); 743 744 INIT_WORK(&p->release_work, kfd_process_wq_release); 745 queue_work(kfd_process_wq, &p->release_work); 746 } 747 748 static void kfd_process_free_notifier(struct mmu_notifier *mn) 749 { 750 kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); 751 } 752 753 static void kfd_process_notifier_release(struct mmu_notifier *mn, 754 struct mm_struct *mm) 755 { 756 struct kfd_process *p; 757 struct kfd_process_device *pdd = NULL; 758 759 /* 760 * The kfd_process structure can not be free because the 761 * mmu_notifier srcu is read locked 762 */ 763 p = container_of(mn, struct kfd_process, mmu_notifier); 764 if (WARN_ON(p->mm != mm)) 765 return; 766 767 mutex_lock(&kfd_processes_mutex); 768 hash_del_rcu(&p->kfd_processes); 769 mutex_unlock(&kfd_processes_mutex); 770 synchronize_srcu(&kfd_processes_srcu); 771 772 cancel_delayed_work_sync(&p->eviction_work); 773 cancel_delayed_work_sync(&p->restore_work); 774 775 mutex_lock(&p->mutex); 776 777 /* Iterate over all process device data structures and if the 778 * pdd is in debug mode, we should first force unregistration, 779 * then we will be able to destroy the queues 780 */ 781 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 782 struct kfd_dev *dev = pdd->dev; 783 784 mutex_lock(kfd_get_dbgmgr_mutex()); 785 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { 786 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { 787 kfd_dbgmgr_destroy(dev->dbgmgr); 788 dev->dbgmgr = NULL; 789 } 790 } 791 mutex_unlock(kfd_get_dbgmgr_mutex()); 792 } 793 794 kfd_process_dequeue_from_all_devices(p); 795 pqm_uninit(&p->pqm); 796 797 /* Indicate to other users that MM is no longer valid */ 798 p->mm = NULL; 799 /* Signal the eviction fence after user mode queues are 800 * destroyed. This allows any BOs to be freed without 801 * triggering pointless evictions or waiting for fences. 802 */ 803 dma_fence_signal(p->ef); 804 805 mutex_unlock(&p->mutex); 806 807 mmu_notifier_put(&p->mmu_notifier); 808 } 809 810 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 811 .release = kfd_process_notifier_release, 812 .free_notifier = kfd_process_free_notifier, 813 }; 814 815 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) 816 { 817 unsigned long offset; 818 struct kfd_process_device *pdd; 819 820 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 821 struct kfd_dev *dev = pdd->dev; 822 struct qcm_process_device *qpd = &pdd->qpd; 823 824 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) 825 continue; 826 827 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); 828 qpd->tba_addr = (int64_t)vm_mmap(filep, 0, 829 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, 830 MAP_SHARED, offset); 831 832 if (IS_ERR_VALUE(qpd->tba_addr)) { 833 int err = qpd->tba_addr; 834 835 pr_err("Failure to set tba address. error %d.\n", err); 836 qpd->tba_addr = 0; 837 qpd->cwsr_kaddr = NULL; 838 return err; 839 } 840 841 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 842 843 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 844 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", 845 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); 846 } 847 848 return 0; 849 } 850 851 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) 852 { 853 struct kfd_dev *dev = pdd->dev; 854 struct qcm_process_device *qpd = &pdd->qpd; 855 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT 856 | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE 857 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; 858 void *kaddr; 859 int ret; 860 861 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) 862 return 0; 863 864 /* cwsr_base is only set for dGPU */ 865 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, 866 KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr); 867 if (ret) 868 return ret; 869 870 qpd->cwsr_kaddr = kaddr; 871 qpd->tba_addr = qpd->cwsr_base; 872 873 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); 874 875 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; 876 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", 877 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); 878 879 return 0; 880 } 881 882 /* 883 * On return the kfd_process is fully operational and will be freed when the 884 * mm is released 885 */ 886 static struct kfd_process *create_process(const struct task_struct *thread) 887 { 888 struct kfd_process *process; 889 int err = -ENOMEM; 890 891 process = kzalloc(sizeof(*process), GFP_KERNEL); 892 if (!process) 893 goto err_alloc_process; 894 895 kref_init(&process->ref); 896 mutex_init(&process->mutex); 897 process->mm = thread->mm; 898 process->lead_thread = thread->group_leader; 899 INIT_LIST_HEAD(&process->per_device_data); 900 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); 901 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); 902 process->last_restore_timestamp = get_jiffies_64(); 903 kfd_event_init_process(process); 904 process->is_32bit_user_mode = in_compat_syscall(); 905 906 process->pasid = kfd_pasid_alloc(); 907 if (process->pasid == 0) 908 goto err_alloc_pasid; 909 910 if (kfd_alloc_process_doorbells(process) < 0) 911 goto err_alloc_doorbells; 912 913 err = pqm_init(&process->pqm, process); 914 if (err != 0) 915 goto err_process_pqm_init; 916 917 /* init process apertures*/ 918 err = kfd_init_apertures(process); 919 if (err != 0) 920 goto err_init_apertures; 921 922 /* Must be last, have to use release destruction after this */ 923 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 924 err = mmu_notifier_register(&process->mmu_notifier, process->mm); 925 if (err) 926 goto err_register_notifier; 927 928 get_task_struct(process->lead_thread); 929 hash_add_rcu(kfd_processes_table, &process->kfd_processes, 930 (uintptr_t)process->mm); 931 932 return process; 933 934 err_register_notifier: 935 kfd_process_free_outstanding_kfd_bos(process); 936 kfd_process_destroy_pdds(process); 937 err_init_apertures: 938 pqm_uninit(&process->pqm); 939 err_process_pqm_init: 940 kfd_free_process_doorbells(process); 941 err_alloc_doorbells: 942 kfd_pasid_free(process->pasid); 943 err_alloc_pasid: 944 mutex_destroy(&process->mutex); 945 kfree(process); 946 err_alloc_process: 947 return ERR_PTR(err); 948 } 949 950 static int init_doorbell_bitmap(struct qcm_process_device *qpd, 951 struct kfd_dev *dev) 952 { 953 unsigned int i; 954 int range_start = dev->shared_resources.non_cp_doorbells_start; 955 int range_end = dev->shared_resources.non_cp_doorbells_end; 956 957 if (!KFD_IS_SOC15(dev->device_info->asic_family)) 958 return 0; 959 960 qpd->doorbell_bitmap = 961 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, 962 BITS_PER_BYTE), GFP_KERNEL); 963 if (!qpd->doorbell_bitmap) 964 return -ENOMEM; 965 966 /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ 967 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end); 968 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", 969 range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, 970 range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); 971 972 for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { 973 if (i >= range_start && i <= range_end) { 974 set_bit(i, qpd->doorbell_bitmap); 975 set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, 976 qpd->doorbell_bitmap); 977 } 978 } 979 980 return 0; 981 } 982 983 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 984 struct kfd_process *p) 985 { 986 struct kfd_process_device *pdd = NULL; 987 988 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 989 if (pdd->dev == dev) 990 return pdd; 991 992 return NULL; 993 } 994 995 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 996 struct kfd_process *p) 997 { 998 struct kfd_process_device *pdd = NULL; 999 1000 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 1001 if (!pdd) 1002 return NULL; 1003 1004 if (init_doorbell_bitmap(&pdd->qpd, dev)) { 1005 pr_err("Failed to init doorbell for process\n"); 1006 kfree(pdd); 1007 return NULL; 1008 } 1009 1010 pdd->dev = dev; 1011 INIT_LIST_HEAD(&pdd->qpd.queues_list); 1012 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 1013 pdd->qpd.dqm = dev->dqm; 1014 pdd->qpd.pqm = &p->pqm; 1015 pdd->qpd.evicted = 0; 1016 pdd->qpd.mapped_gws_queue = false; 1017 pdd->process = p; 1018 pdd->bound = PDD_UNBOUND; 1019 pdd->already_dequeued = false; 1020 pdd->runtime_inuse = false; 1021 pdd->vram_usage = 0; 1022 pdd->sdma_past_activity_counter = 0; 1023 list_add(&pdd->per_device_list, &p->per_device_data); 1024 1025 /* Init idr used for memory handle translation */ 1026 idr_init(&pdd->alloc_idr); 1027 1028 return pdd; 1029 } 1030 1031 /** 1032 * kfd_process_device_init_vm - Initialize a VM for a process-device 1033 * 1034 * @pdd: The process-device 1035 * @drm_file: Optional pointer to a DRM file descriptor 1036 * 1037 * If @drm_file is specified, it will be used to acquire the VM from 1038 * that file descriptor. If successful, the @pdd takes ownership of 1039 * the file descriptor. 1040 * 1041 * If @drm_file is NULL, a new VM is created. 1042 * 1043 * Returns 0 on success, -errno on failure. 1044 */ 1045 int kfd_process_device_init_vm(struct kfd_process_device *pdd, 1046 struct file *drm_file) 1047 { 1048 struct kfd_process *p; 1049 struct kfd_dev *dev; 1050 int ret; 1051 1052 if (pdd->vm) 1053 return drm_file ? -EBUSY : 0; 1054 1055 p = pdd->process; 1056 dev = pdd->dev; 1057 1058 if (drm_file) 1059 ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( 1060 dev->kgd, drm_file, p->pasid, 1061 &pdd->vm, &p->kgd_process_info, &p->ef); 1062 else 1063 ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, 1064 &pdd->vm, &p->kgd_process_info, &p->ef); 1065 if (ret) { 1066 pr_err("Failed to create process VM object\n"); 1067 return ret; 1068 } 1069 1070 amdgpu_vm_set_task_info(pdd->vm); 1071 1072 ret = kfd_process_device_reserve_ib_mem(pdd); 1073 if (ret) 1074 goto err_reserve_ib_mem; 1075 ret = kfd_process_device_init_cwsr_dgpu(pdd); 1076 if (ret) 1077 goto err_init_cwsr; 1078 1079 pdd->drm_file = drm_file; 1080 1081 return 0; 1082 1083 err_init_cwsr: 1084 err_reserve_ib_mem: 1085 kfd_process_device_free_bos(pdd); 1086 if (!drm_file) 1087 amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); 1088 pdd->vm = NULL; 1089 1090 return ret; 1091 } 1092 1093 /* 1094 * Direct the IOMMU to bind the process (specifically the pasid->mm) 1095 * to the device. 1096 * Unbinding occurs when the process dies or the device is removed. 1097 * 1098 * Assumes that the process lock is held. 1099 */ 1100 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 1101 struct kfd_process *p) 1102 { 1103 struct kfd_process_device *pdd; 1104 int err; 1105 1106 pdd = kfd_get_process_device_data(dev, p); 1107 if (!pdd) { 1108 pr_err("Process device data doesn't exist\n"); 1109 return ERR_PTR(-ENOMEM); 1110 } 1111 1112 /* 1113 * signal runtime-pm system to auto resume and prevent 1114 * further runtime suspend once device pdd is created until 1115 * pdd is destroyed. 1116 */ 1117 if (!pdd->runtime_inuse) { 1118 err = pm_runtime_get_sync(dev->ddev->dev); 1119 if (err < 0) 1120 return ERR_PTR(err); 1121 } 1122 1123 err = kfd_iommu_bind_process_to_device(pdd); 1124 if (err) 1125 goto out; 1126 1127 err = kfd_process_device_init_vm(pdd, NULL); 1128 if (err) 1129 goto out; 1130 1131 /* 1132 * make sure that runtime_usage counter is incremented just once 1133 * per pdd 1134 */ 1135 pdd->runtime_inuse = true; 1136 1137 return pdd; 1138 1139 out: 1140 /* balance runpm reference count and exit with error */ 1141 if (!pdd->runtime_inuse) { 1142 pm_runtime_mark_last_busy(dev->ddev->dev); 1143 pm_runtime_put_autosuspend(dev->ddev->dev); 1144 } 1145 1146 return ERR_PTR(err); 1147 } 1148 1149 struct kfd_process_device *kfd_get_first_process_device_data( 1150 struct kfd_process *p) 1151 { 1152 return list_first_entry(&p->per_device_data, 1153 struct kfd_process_device, 1154 per_device_list); 1155 } 1156 1157 struct kfd_process_device *kfd_get_next_process_device_data( 1158 struct kfd_process *p, 1159 struct kfd_process_device *pdd) 1160 { 1161 if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 1162 return NULL; 1163 return list_next_entry(pdd, per_device_list); 1164 } 1165 1166 bool kfd_has_process_device_data(struct kfd_process *p) 1167 { 1168 return !(list_empty(&p->per_device_data)); 1169 } 1170 1171 /* Create specific handle mapped to mem from process local memory idr 1172 * Assumes that the process lock is held. 1173 */ 1174 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, 1175 void *mem) 1176 { 1177 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); 1178 } 1179 1180 /* Translate specific handle from process local memory idr 1181 * Assumes that the process lock is held. 1182 */ 1183 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, 1184 int handle) 1185 { 1186 if (handle < 0) 1187 return NULL; 1188 1189 return idr_find(&pdd->alloc_idr, handle); 1190 } 1191 1192 /* Remove specific handle from process local memory idr 1193 * Assumes that the process lock is held. 1194 */ 1195 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, 1196 int handle) 1197 { 1198 if (handle >= 0) 1199 idr_remove(&pdd->alloc_idr, handle); 1200 } 1201 1202 /* This increments the process->ref counter. */ 1203 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 1204 { 1205 struct kfd_process *p, *ret_p = NULL; 1206 unsigned int temp; 1207 1208 int idx = srcu_read_lock(&kfd_processes_srcu); 1209 1210 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1211 if (p->pasid == pasid) { 1212 kref_get(&p->ref); 1213 ret_p = p; 1214 break; 1215 } 1216 } 1217 1218 srcu_read_unlock(&kfd_processes_srcu, idx); 1219 1220 return ret_p; 1221 } 1222 1223 /* This increments the process->ref counter. */ 1224 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) 1225 { 1226 struct kfd_process *p; 1227 1228 int idx = srcu_read_lock(&kfd_processes_srcu); 1229 1230 p = find_process_by_mm(mm); 1231 if (p) 1232 kref_get(&p->ref); 1233 1234 srcu_read_unlock(&kfd_processes_srcu, idx); 1235 1236 return p; 1237 } 1238 1239 /* kfd_process_evict_queues - Evict all user queues of a process 1240 * 1241 * Eviction is reference-counted per process-device. This means multiple 1242 * evictions from different sources can be nested safely. 1243 */ 1244 int kfd_process_evict_queues(struct kfd_process *p) 1245 { 1246 struct kfd_process_device *pdd; 1247 int r = 0; 1248 unsigned int n_evicted = 0; 1249 1250 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 1251 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, 1252 &pdd->qpd); 1253 if (r) { 1254 pr_err("Failed to evict process queues\n"); 1255 goto fail; 1256 } 1257 n_evicted++; 1258 } 1259 1260 return r; 1261 1262 fail: 1263 /* To keep state consistent, roll back partial eviction by 1264 * restoring queues 1265 */ 1266 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 1267 if (n_evicted == 0) 1268 break; 1269 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 1270 &pdd->qpd)) 1271 pr_err("Failed to restore queues\n"); 1272 1273 n_evicted--; 1274 } 1275 1276 return r; 1277 } 1278 1279 /* kfd_process_restore_queues - Restore all user queues of a process */ 1280 int kfd_process_restore_queues(struct kfd_process *p) 1281 { 1282 struct kfd_process_device *pdd; 1283 int r, ret = 0; 1284 1285 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 1286 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, 1287 &pdd->qpd); 1288 if (r) { 1289 pr_err("Failed to restore process queues\n"); 1290 if (!ret) 1291 ret = r; 1292 } 1293 } 1294 1295 return ret; 1296 } 1297 1298 static void evict_process_worker(struct work_struct *work) 1299 { 1300 int ret; 1301 struct kfd_process *p; 1302 struct delayed_work *dwork; 1303 1304 dwork = to_delayed_work(work); 1305 1306 /* Process termination destroys this worker thread. So during the 1307 * lifetime of this thread, kfd_process p will be valid 1308 */ 1309 p = container_of(dwork, struct kfd_process, eviction_work); 1310 WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, 1311 "Eviction fence mismatch\n"); 1312 1313 /* Narrow window of overlap between restore and evict work 1314 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos 1315 * unreserves KFD BOs, it is possible to evicted again. But 1316 * restore has few more steps of finish. So lets wait for any 1317 * previous restore work to complete 1318 */ 1319 flush_delayed_work(&p->restore_work); 1320 1321 pr_debug("Started evicting pasid 0x%x\n", p->pasid); 1322 ret = kfd_process_evict_queues(p); 1323 if (!ret) { 1324 dma_fence_signal(p->ef); 1325 dma_fence_put(p->ef); 1326 p->ef = NULL; 1327 queue_delayed_work(kfd_restore_wq, &p->restore_work, 1328 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); 1329 1330 pr_debug("Finished evicting pasid 0x%x\n", p->pasid); 1331 } else 1332 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); 1333 } 1334 1335 static void restore_process_worker(struct work_struct *work) 1336 { 1337 struct delayed_work *dwork; 1338 struct kfd_process *p; 1339 int ret = 0; 1340 1341 dwork = to_delayed_work(work); 1342 1343 /* Process termination destroys this worker thread. So during the 1344 * lifetime of this thread, kfd_process p will be valid 1345 */ 1346 p = container_of(dwork, struct kfd_process, restore_work); 1347 pr_debug("Started restoring pasid 0x%x\n", p->pasid); 1348 1349 /* Setting last_restore_timestamp before successful restoration. 1350 * Otherwise this would have to be set by KGD (restore_process_bos) 1351 * before KFD BOs are unreserved. If not, the process can be evicted 1352 * again before the timestamp is set. 1353 * If restore fails, the timestamp will be set again in the next 1354 * attempt. This would mean that the minimum GPU quanta would be 1355 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two 1356 * functions) 1357 */ 1358 1359 p->last_restore_timestamp = get_jiffies_64(); 1360 ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, 1361 &p->ef); 1362 if (ret) { 1363 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", 1364 p->pasid, PROCESS_BACK_OFF_TIME_MS); 1365 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, 1366 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); 1367 WARN(!ret, "reschedule restore work failed\n"); 1368 return; 1369 } 1370 1371 ret = kfd_process_restore_queues(p); 1372 if (!ret) 1373 pr_debug("Finished restoring pasid 0x%x\n", p->pasid); 1374 else 1375 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); 1376 } 1377 1378 void kfd_suspend_all_processes(void) 1379 { 1380 struct kfd_process *p; 1381 unsigned int temp; 1382 int idx = srcu_read_lock(&kfd_processes_srcu); 1383 1384 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1385 cancel_delayed_work_sync(&p->eviction_work); 1386 cancel_delayed_work_sync(&p->restore_work); 1387 1388 if (kfd_process_evict_queues(p)) 1389 pr_err("Failed to suspend process 0x%x\n", p->pasid); 1390 dma_fence_signal(p->ef); 1391 dma_fence_put(p->ef); 1392 p->ef = NULL; 1393 } 1394 srcu_read_unlock(&kfd_processes_srcu, idx); 1395 } 1396 1397 int kfd_resume_all_processes(void) 1398 { 1399 struct kfd_process *p; 1400 unsigned int temp; 1401 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); 1402 1403 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1404 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { 1405 pr_err("Restore process %d failed during resume\n", 1406 p->pasid); 1407 ret = -EFAULT; 1408 } 1409 } 1410 srcu_read_unlock(&kfd_processes_srcu, idx); 1411 return ret; 1412 } 1413 1414 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 1415 struct vm_area_struct *vma) 1416 { 1417 struct kfd_process_device *pdd; 1418 struct qcm_process_device *qpd; 1419 1420 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { 1421 pr_err("Incorrect CWSR mapping size.\n"); 1422 return -EINVAL; 1423 } 1424 1425 pdd = kfd_get_process_device_data(dev, process); 1426 if (!pdd) 1427 return -EINVAL; 1428 qpd = &pdd->qpd; 1429 1430 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1431 get_order(KFD_CWSR_TBA_TMA_SIZE)); 1432 if (!qpd->cwsr_kaddr) { 1433 pr_err("Error allocating per process CWSR buffer.\n"); 1434 return -ENOMEM; 1435 } 1436 1437 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND 1438 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; 1439 /* Mapping pages to user process */ 1440 return remap_pfn_range(vma, vma->vm_start, 1441 PFN_DOWN(__pa(qpd->cwsr_kaddr)), 1442 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); 1443 } 1444 1445 void kfd_flush_tlb(struct kfd_process_device *pdd) 1446 { 1447 struct kfd_dev *dev = pdd->dev; 1448 1449 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 1450 /* Nothing to flush until a VMID is assigned, which 1451 * only happens when the first queue is created. 1452 */ 1453 if (pdd->qpd.vmid) 1454 amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, 1455 pdd->qpd.vmid); 1456 } else { 1457 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, 1458 pdd->process->pasid); 1459 } 1460 } 1461 1462 #if defined(CONFIG_DEBUG_FS) 1463 1464 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) 1465 { 1466 struct kfd_process *p; 1467 unsigned int temp; 1468 int r = 0; 1469 1470 int idx = srcu_read_lock(&kfd_processes_srcu); 1471 1472 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 1473 seq_printf(m, "Process %d PASID 0x%x:\n", 1474 p->lead_thread->tgid, p->pasid); 1475 1476 mutex_lock(&p->mutex); 1477 r = pqm_debugfs_mqds(m, &p->pqm); 1478 mutex_unlock(&p->mutex); 1479 1480 if (r) 1481 break; 1482 } 1483 1484 srcu_read_unlock(&kfd_processes_srcu, idx); 1485 1486 return r; 1487 } 1488 1489 #endif 1490 1491