1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <linux/dma-buf.h> 37 #include <asm/processor.h> 38 #include "kfd_priv.h" 39 #include "kfd_device_queue_manager.h" 40 #include "kfd_dbgmgr.h" 41 #include "amdgpu_amdkfd.h" 42 43 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 44 static int kfd_open(struct inode *, struct file *); 45 static int kfd_release(struct inode *, struct file *); 46 static int kfd_mmap(struct file *, struct vm_area_struct *); 47 48 static const char kfd_dev_name[] = "kfd"; 49 50 static const struct file_operations kfd_fops = { 51 .owner = THIS_MODULE, 52 .unlocked_ioctl = kfd_ioctl, 53 .compat_ioctl = compat_ptr_ioctl, 54 .open = kfd_open, 55 .release = kfd_release, 56 .mmap = kfd_mmap, 57 }; 58 59 static int kfd_char_dev_major = -1; 60 static struct class *kfd_class; 61 struct device *kfd_device; 62 63 int kfd_chardev_init(void) 64 { 65 int err = 0; 66 67 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 68 err = kfd_char_dev_major; 69 if (err < 0) 70 goto err_register_chrdev; 71 72 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 73 err = PTR_ERR(kfd_class); 74 if (IS_ERR(kfd_class)) 75 goto err_class_create; 76 77 kfd_device = device_create(kfd_class, NULL, 78 MKDEV(kfd_char_dev_major, 0), 79 NULL, kfd_dev_name); 80 err = PTR_ERR(kfd_device); 81 if (IS_ERR(kfd_device)) 82 goto err_device_create; 83 84 return 0; 85 86 err_device_create: 87 class_destroy(kfd_class); 88 err_class_create: 89 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 90 err_register_chrdev: 91 return err; 92 } 93 94 void kfd_chardev_exit(void) 95 { 96 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 97 class_destroy(kfd_class); 98 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 99 } 100 101 struct device *kfd_chardev(void) 102 { 103 return kfd_device; 104 } 105 106 107 static int kfd_open(struct inode *inode, struct file *filep) 108 { 109 struct kfd_process *process; 110 bool is_32bit_user_mode; 111 112 if (iminor(inode) != 0) 113 return -ENODEV; 114 115 is_32bit_user_mode = in_compat_syscall(); 116 117 if (is_32bit_user_mode) { 118 dev_warn(kfd_device, 119 "Process %d (32-bit) failed to open /dev/kfd\n" 120 "32-bit processes are not supported by amdkfd\n", 121 current->pid); 122 return -EPERM; 123 } 124 125 process = kfd_create_process(filep); 126 if (IS_ERR(process)) 127 return PTR_ERR(process); 128 129 if (kfd_is_locked()) { 130 kfd_unref_process(process); 131 return -EAGAIN; 132 } 133 134 /* filep now owns the reference returned by kfd_create_process */ 135 filep->private_data = process; 136 137 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 138 process->pasid, process->is_32bit_user_mode); 139 140 return 0; 141 } 142 143 static int kfd_release(struct inode *inode, struct file *filep) 144 { 145 struct kfd_process *process = filep->private_data; 146 147 if (process) 148 kfd_unref_process(process); 149 150 return 0; 151 } 152 153 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 154 void *data) 155 { 156 struct kfd_ioctl_get_version_args *args = data; 157 158 args->major_version = KFD_IOCTL_MAJOR_VERSION; 159 args->minor_version = KFD_IOCTL_MINOR_VERSION; 160 161 return 0; 162 } 163 164 static int set_queue_properties_from_user(struct queue_properties *q_properties, 165 struct kfd_ioctl_create_queue_args *args) 166 { 167 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 168 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 169 return -EINVAL; 170 } 171 172 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 173 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 174 return -EINVAL; 175 } 176 177 if ((args->ring_base_address) && 178 (!access_ok((const void __user *) args->ring_base_address, 179 sizeof(uint64_t)))) { 180 pr_err("Can't access ring base address\n"); 181 return -EFAULT; 182 } 183 184 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 185 pr_err("Ring size must be a power of 2 or 0\n"); 186 return -EINVAL; 187 } 188 189 if (!access_ok((const void __user *) args->read_pointer_address, 190 sizeof(uint32_t))) { 191 pr_err("Can't access read pointer\n"); 192 return -EFAULT; 193 } 194 195 if (!access_ok((const void __user *) args->write_pointer_address, 196 sizeof(uint32_t))) { 197 pr_err("Can't access write pointer\n"); 198 return -EFAULT; 199 } 200 201 if (args->eop_buffer_address && 202 !access_ok((const void __user *) args->eop_buffer_address, 203 sizeof(uint32_t))) { 204 pr_debug("Can't access eop buffer"); 205 return -EFAULT; 206 } 207 208 if (args->ctx_save_restore_address && 209 !access_ok((const void __user *) args->ctx_save_restore_address, 210 sizeof(uint32_t))) { 211 pr_debug("Can't access ctx save restore buffer"); 212 return -EFAULT; 213 } 214 215 q_properties->is_interop = false; 216 q_properties->queue_percent = args->queue_percentage; 217 q_properties->priority = args->queue_priority; 218 q_properties->queue_address = args->ring_base_address; 219 q_properties->queue_size = args->ring_size; 220 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 221 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 222 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 223 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 224 q_properties->ctx_save_restore_area_address = 225 args->ctx_save_restore_address; 226 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 227 q_properties->ctl_stack_size = args->ctl_stack_size; 228 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 229 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 230 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 231 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 232 q_properties->type = KFD_QUEUE_TYPE_SDMA; 233 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) 234 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; 235 else 236 return -ENOTSUPP; 237 238 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 239 q_properties->format = KFD_QUEUE_FORMAT_AQL; 240 else 241 q_properties->format = KFD_QUEUE_FORMAT_PM4; 242 243 pr_debug("Queue Percentage: %d, %d\n", 244 q_properties->queue_percent, args->queue_percentage); 245 246 pr_debug("Queue Priority: %d, %d\n", 247 q_properties->priority, args->queue_priority); 248 249 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 250 q_properties->queue_address, args->ring_base_address); 251 252 pr_debug("Queue Size: 0x%llX, %u\n", 253 q_properties->queue_size, args->ring_size); 254 255 pr_debug("Queue r/w Pointers: %px, %px\n", 256 q_properties->read_ptr, 257 q_properties->write_ptr); 258 259 pr_debug("Queue Format: %d\n", q_properties->format); 260 261 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 262 263 pr_debug("Queue CTX save area: 0x%llX\n", 264 q_properties->ctx_save_restore_area_address); 265 266 return 0; 267 } 268 269 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 270 void *data) 271 { 272 struct kfd_ioctl_create_queue_args *args = data; 273 struct kfd_dev *dev; 274 int err = 0; 275 unsigned int queue_id; 276 struct kfd_process_device *pdd; 277 struct queue_properties q_properties; 278 uint32_t doorbell_offset_in_process = 0; 279 280 memset(&q_properties, 0, sizeof(struct queue_properties)); 281 282 pr_debug("Creating queue ioctl\n"); 283 284 err = set_queue_properties_from_user(&q_properties, args); 285 if (err) 286 return err; 287 288 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 289 dev = kfd_device_by_id(args->gpu_id); 290 if (!dev) { 291 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 292 return -EINVAL; 293 } 294 295 mutex_lock(&p->mutex); 296 297 pdd = kfd_bind_process_to_device(dev, p); 298 if (IS_ERR(pdd)) { 299 err = -ESRCH; 300 goto err_bind_process; 301 } 302 303 pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", 304 p->pasid, 305 dev->id); 306 307 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, 308 &doorbell_offset_in_process); 309 if (err != 0) 310 goto err_create_queue; 311 312 args->queue_id = queue_id; 313 314 315 /* Return gpu_id as doorbell offset for mmap usage */ 316 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 317 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 318 if (KFD_IS_SOC15(dev->device_info->asic_family)) 319 /* On SOC15 ASICs, include the doorbell offset within the 320 * process doorbell frame, which is 2 pages. 321 */ 322 args->doorbell_offset |= doorbell_offset_in_process; 323 324 mutex_unlock(&p->mutex); 325 326 pr_debug("Queue id %d was created successfully\n", args->queue_id); 327 328 pr_debug("Ring buffer address == 0x%016llX\n", 329 args->ring_base_address); 330 331 pr_debug("Read ptr address == 0x%016llX\n", 332 args->read_pointer_address); 333 334 pr_debug("Write ptr address == 0x%016llX\n", 335 args->write_pointer_address); 336 337 return 0; 338 339 err_create_queue: 340 err_bind_process: 341 mutex_unlock(&p->mutex); 342 return err; 343 } 344 345 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 346 void *data) 347 { 348 int retval; 349 struct kfd_ioctl_destroy_queue_args *args = data; 350 351 pr_debug("Destroying queue id %d for pasid 0x%x\n", 352 args->queue_id, 353 p->pasid); 354 355 mutex_lock(&p->mutex); 356 357 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 358 359 mutex_unlock(&p->mutex); 360 return retval; 361 } 362 363 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 364 void *data) 365 { 366 int retval; 367 struct kfd_ioctl_update_queue_args *args = data; 368 struct queue_properties properties; 369 370 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 371 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 372 return -EINVAL; 373 } 374 375 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 376 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 377 return -EINVAL; 378 } 379 380 if ((args->ring_base_address) && 381 (!access_ok((const void __user *) args->ring_base_address, 382 sizeof(uint64_t)))) { 383 pr_err("Can't access ring base address\n"); 384 return -EFAULT; 385 } 386 387 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 388 pr_err("Ring size must be a power of 2 or 0\n"); 389 return -EINVAL; 390 } 391 392 properties.queue_address = args->ring_base_address; 393 properties.queue_size = args->ring_size; 394 properties.queue_percent = args->queue_percentage; 395 properties.priority = args->queue_priority; 396 397 pr_debug("Updating queue id %d for pasid 0x%x\n", 398 args->queue_id, p->pasid); 399 400 mutex_lock(&p->mutex); 401 402 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 403 404 mutex_unlock(&p->mutex); 405 406 return retval; 407 } 408 409 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 410 void *data) 411 { 412 int retval; 413 const int max_num_cus = 1024; 414 struct kfd_ioctl_set_cu_mask_args *args = data; 415 struct queue_properties properties; 416 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 417 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 418 419 if ((args->num_cu_mask % 32) != 0) { 420 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 421 args->num_cu_mask); 422 return -EINVAL; 423 } 424 425 properties.cu_mask_count = args->num_cu_mask; 426 if (properties.cu_mask_count == 0) { 427 pr_debug("CU mask cannot be 0"); 428 return -EINVAL; 429 } 430 431 /* To prevent an unreasonably large CU mask size, set an arbitrary 432 * limit of max_num_cus bits. We can then just drop any CU mask bits 433 * past max_num_cus bits and just use the first max_num_cus bits. 434 */ 435 if (properties.cu_mask_count > max_num_cus) { 436 pr_debug("CU mask cannot be greater than 1024 bits"); 437 properties.cu_mask_count = max_num_cus; 438 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 439 } 440 441 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); 442 if (!properties.cu_mask) 443 return -ENOMEM; 444 445 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); 446 if (retval) { 447 pr_debug("Could not copy CU mask from userspace"); 448 kfree(properties.cu_mask); 449 return -EFAULT; 450 } 451 452 mutex_lock(&p->mutex); 453 454 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); 455 456 mutex_unlock(&p->mutex); 457 458 if (retval) 459 kfree(properties.cu_mask); 460 461 return retval; 462 } 463 464 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 465 struct kfd_process *p, void *data) 466 { 467 struct kfd_ioctl_get_queue_wave_state_args *args = data; 468 int r; 469 470 mutex_lock(&p->mutex); 471 472 r = pqm_get_wave_state(&p->pqm, args->queue_id, 473 (void __user *)args->ctl_stack_address, 474 &args->ctl_stack_used_size, 475 &args->save_area_used_size); 476 477 mutex_unlock(&p->mutex); 478 479 return r; 480 } 481 482 static int kfd_ioctl_set_memory_policy(struct file *filep, 483 struct kfd_process *p, void *data) 484 { 485 struct kfd_ioctl_set_memory_policy_args *args = data; 486 struct kfd_dev *dev; 487 int err = 0; 488 struct kfd_process_device *pdd; 489 enum cache_policy default_policy, alternate_policy; 490 491 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 492 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 493 return -EINVAL; 494 } 495 496 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 497 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 498 return -EINVAL; 499 } 500 501 dev = kfd_device_by_id(args->gpu_id); 502 if (!dev) 503 return -EINVAL; 504 505 mutex_lock(&p->mutex); 506 507 pdd = kfd_bind_process_to_device(dev, p); 508 if (IS_ERR(pdd)) { 509 err = -ESRCH; 510 goto out; 511 } 512 513 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 514 ? cache_policy_coherent : cache_policy_noncoherent; 515 516 alternate_policy = 517 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 518 ? cache_policy_coherent : cache_policy_noncoherent; 519 520 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 521 &pdd->qpd, 522 default_policy, 523 alternate_policy, 524 (void __user *)args->alternate_aperture_base, 525 args->alternate_aperture_size)) 526 err = -EINVAL; 527 528 out: 529 mutex_unlock(&p->mutex); 530 531 return err; 532 } 533 534 static int kfd_ioctl_set_trap_handler(struct file *filep, 535 struct kfd_process *p, void *data) 536 { 537 struct kfd_ioctl_set_trap_handler_args *args = data; 538 struct kfd_dev *dev; 539 int err = 0; 540 struct kfd_process_device *pdd; 541 542 dev = kfd_device_by_id(args->gpu_id); 543 if (!dev) 544 return -EINVAL; 545 546 mutex_lock(&p->mutex); 547 548 pdd = kfd_bind_process_to_device(dev, p); 549 if (IS_ERR(pdd)) { 550 err = -ESRCH; 551 goto out; 552 } 553 554 if (dev->dqm->ops.set_trap_handler(dev->dqm, 555 &pdd->qpd, 556 args->tba_addr, 557 args->tma_addr)) 558 err = -EINVAL; 559 560 out: 561 mutex_unlock(&p->mutex); 562 563 return err; 564 } 565 566 static int kfd_ioctl_dbg_register(struct file *filep, 567 struct kfd_process *p, void *data) 568 { 569 struct kfd_ioctl_dbg_register_args *args = data; 570 struct kfd_dev *dev; 571 struct kfd_dbgmgr *dbgmgr_ptr; 572 struct kfd_process_device *pdd; 573 bool create_ok; 574 long status = 0; 575 576 dev = kfd_device_by_id(args->gpu_id); 577 if (!dev) 578 return -EINVAL; 579 580 if (dev->device_info->asic_family == CHIP_CARRIZO) { 581 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 582 return -EINVAL; 583 } 584 585 mutex_lock(&p->mutex); 586 mutex_lock(kfd_get_dbgmgr_mutex()); 587 588 /* 589 * make sure that we have pdd, if this the first queue created for 590 * this process 591 */ 592 pdd = kfd_bind_process_to_device(dev, p); 593 if (IS_ERR(pdd)) { 594 status = PTR_ERR(pdd); 595 goto out; 596 } 597 598 if (!dev->dbgmgr) { 599 /* In case of a legal call, we have no dbgmgr yet */ 600 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 601 if (create_ok) { 602 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 603 if (status != 0) 604 kfd_dbgmgr_destroy(dbgmgr_ptr); 605 else 606 dev->dbgmgr = dbgmgr_ptr; 607 } 608 } else { 609 pr_debug("debugger already registered\n"); 610 status = -EINVAL; 611 } 612 613 out: 614 mutex_unlock(kfd_get_dbgmgr_mutex()); 615 mutex_unlock(&p->mutex); 616 617 return status; 618 } 619 620 static int kfd_ioctl_dbg_unregister(struct file *filep, 621 struct kfd_process *p, void *data) 622 { 623 struct kfd_ioctl_dbg_unregister_args *args = data; 624 struct kfd_dev *dev; 625 long status; 626 627 dev = kfd_device_by_id(args->gpu_id); 628 if (!dev || !dev->dbgmgr) 629 return -EINVAL; 630 631 if (dev->device_info->asic_family == CHIP_CARRIZO) { 632 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 633 return -EINVAL; 634 } 635 636 mutex_lock(kfd_get_dbgmgr_mutex()); 637 638 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 639 if (!status) { 640 kfd_dbgmgr_destroy(dev->dbgmgr); 641 dev->dbgmgr = NULL; 642 } 643 644 mutex_unlock(kfd_get_dbgmgr_mutex()); 645 646 return status; 647 } 648 649 /* 650 * Parse and generate variable size data structure for address watch. 651 * Total size of the buffer and # watch points is limited in order 652 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 653 * which is enforced by dbgdev module) 654 * please also note that the watch address itself are not "copied from user", 655 * since it be set into the HW in user mode values. 656 * 657 */ 658 static int kfd_ioctl_dbg_address_watch(struct file *filep, 659 struct kfd_process *p, void *data) 660 { 661 struct kfd_ioctl_dbg_address_watch_args *args = data; 662 struct kfd_dev *dev; 663 struct dbg_address_watch_info aw_info; 664 unsigned char *args_buff; 665 long status; 666 void __user *cmd_from_user; 667 uint64_t watch_mask_value = 0; 668 unsigned int args_idx = 0; 669 670 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 671 672 dev = kfd_device_by_id(args->gpu_id); 673 if (!dev) 674 return -EINVAL; 675 676 if (dev->device_info->asic_family == CHIP_CARRIZO) { 677 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 678 return -EINVAL; 679 } 680 681 cmd_from_user = (void __user *) args->content_ptr; 682 683 /* Validate arguments */ 684 685 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 686 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 687 (cmd_from_user == NULL)) 688 return -EINVAL; 689 690 /* this is the actual buffer to work with */ 691 args_buff = memdup_user(cmd_from_user, 692 args->buf_size_in_bytes - sizeof(*args)); 693 if (IS_ERR(args_buff)) 694 return PTR_ERR(args_buff); 695 696 aw_info.process = p; 697 698 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 699 args_idx += sizeof(aw_info.num_watch_points); 700 701 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 702 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 703 704 /* 705 * set watch address base pointer to point on the array base 706 * within args_buff 707 */ 708 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 709 710 /* skip over the addresses buffer */ 711 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 712 713 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 714 status = -EINVAL; 715 goto out; 716 } 717 718 watch_mask_value = (uint64_t) args_buff[args_idx]; 719 720 if (watch_mask_value > 0) { 721 /* 722 * There is an array of masks. 723 * set watch mask base pointer to point on the array base 724 * within args_buff 725 */ 726 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 727 728 /* skip over the masks buffer */ 729 args_idx += sizeof(aw_info.watch_mask) * 730 aw_info.num_watch_points; 731 } else { 732 /* just the NULL mask, set to NULL and skip over it */ 733 aw_info.watch_mask = NULL; 734 args_idx += sizeof(aw_info.watch_mask); 735 } 736 737 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 738 status = -EINVAL; 739 goto out; 740 } 741 742 /* Currently HSA Event is not supported for DBG */ 743 aw_info.watch_event = NULL; 744 745 mutex_lock(kfd_get_dbgmgr_mutex()); 746 747 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 748 749 mutex_unlock(kfd_get_dbgmgr_mutex()); 750 751 out: 752 kfree(args_buff); 753 754 return status; 755 } 756 757 /* Parse and generate fixed size data structure for wave control */ 758 static int kfd_ioctl_dbg_wave_control(struct file *filep, 759 struct kfd_process *p, void *data) 760 { 761 struct kfd_ioctl_dbg_wave_control_args *args = data; 762 struct kfd_dev *dev; 763 struct dbg_wave_control_info wac_info; 764 unsigned char *args_buff; 765 uint32_t computed_buff_size; 766 long status; 767 void __user *cmd_from_user; 768 unsigned int args_idx = 0; 769 770 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 771 772 /* we use compact form, independent of the packing attribute value */ 773 computed_buff_size = sizeof(*args) + 774 sizeof(wac_info.mode) + 775 sizeof(wac_info.operand) + 776 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 777 sizeof(wac_info.dbgWave_msg.MemoryVA) + 778 sizeof(wac_info.trapId); 779 780 dev = kfd_device_by_id(args->gpu_id); 781 if (!dev) 782 return -EINVAL; 783 784 if (dev->device_info->asic_family == CHIP_CARRIZO) { 785 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 786 return -EINVAL; 787 } 788 789 /* input size must match the computed "compact" size */ 790 if (args->buf_size_in_bytes != computed_buff_size) { 791 pr_debug("size mismatch, computed : actual %u : %u\n", 792 args->buf_size_in_bytes, computed_buff_size); 793 return -EINVAL; 794 } 795 796 cmd_from_user = (void __user *) args->content_ptr; 797 798 if (cmd_from_user == NULL) 799 return -EINVAL; 800 801 /* copy the entire buffer from user */ 802 803 args_buff = memdup_user(cmd_from_user, 804 args->buf_size_in_bytes - sizeof(*args)); 805 if (IS_ERR(args_buff)) 806 return PTR_ERR(args_buff); 807 808 /* move ptr to the start of the "pay-load" area */ 809 wac_info.process = p; 810 811 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 812 args_idx += sizeof(wac_info.operand); 813 814 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 815 args_idx += sizeof(wac_info.mode); 816 817 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 818 args_idx += sizeof(wac_info.trapId); 819 820 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 821 *((uint32_t *)(&args_buff[args_idx])); 822 wac_info.dbgWave_msg.MemoryVA = NULL; 823 824 mutex_lock(kfd_get_dbgmgr_mutex()); 825 826 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 827 wac_info.process, wac_info.operand, 828 wac_info.mode, wac_info.trapId, 829 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 830 831 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 832 833 pr_debug("Returned status of dbg manager is %ld\n", status); 834 835 mutex_unlock(kfd_get_dbgmgr_mutex()); 836 837 kfree(args_buff); 838 839 return status; 840 } 841 842 static int kfd_ioctl_get_clock_counters(struct file *filep, 843 struct kfd_process *p, void *data) 844 { 845 struct kfd_ioctl_get_clock_counters_args *args = data; 846 struct kfd_dev *dev; 847 848 dev = kfd_device_by_id(args->gpu_id); 849 if (dev) 850 /* Reading GPU clock counter from KGD */ 851 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); 852 else 853 /* Node without GPU resource */ 854 args->gpu_clock_counter = 0; 855 856 /* No access to rdtsc. Using raw monotonic time */ 857 args->cpu_clock_counter = ktime_get_raw_ns(); 858 args->system_clock_counter = ktime_get_boottime_ns(); 859 860 /* Since the counter is in nano-seconds we use 1GHz frequency */ 861 args->system_clock_freq = 1000000000; 862 863 return 0; 864 } 865 866 867 static int kfd_ioctl_get_process_apertures(struct file *filp, 868 struct kfd_process *p, void *data) 869 { 870 struct kfd_ioctl_get_process_apertures_args *args = data; 871 struct kfd_process_device_apertures *pAperture; 872 struct kfd_process_device *pdd; 873 874 dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); 875 876 args->num_of_nodes = 0; 877 878 mutex_lock(&p->mutex); 879 880 /*if the process-device list isn't empty*/ 881 if (kfd_has_process_device_data(p)) { 882 /* Run over all pdd of the process */ 883 pdd = kfd_get_first_process_device_data(p); 884 do { 885 pAperture = 886 &args->process_apertures[args->num_of_nodes]; 887 pAperture->gpu_id = pdd->dev->id; 888 pAperture->lds_base = pdd->lds_base; 889 pAperture->lds_limit = pdd->lds_limit; 890 pAperture->gpuvm_base = pdd->gpuvm_base; 891 pAperture->gpuvm_limit = pdd->gpuvm_limit; 892 pAperture->scratch_base = pdd->scratch_base; 893 pAperture->scratch_limit = pdd->scratch_limit; 894 895 dev_dbg(kfd_device, 896 "node id %u\n", args->num_of_nodes); 897 dev_dbg(kfd_device, 898 "gpu id %u\n", pdd->dev->id); 899 dev_dbg(kfd_device, 900 "lds_base %llX\n", pdd->lds_base); 901 dev_dbg(kfd_device, 902 "lds_limit %llX\n", pdd->lds_limit); 903 dev_dbg(kfd_device, 904 "gpuvm_base %llX\n", pdd->gpuvm_base); 905 dev_dbg(kfd_device, 906 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 907 dev_dbg(kfd_device, 908 "scratch_base %llX\n", pdd->scratch_base); 909 dev_dbg(kfd_device, 910 "scratch_limit %llX\n", pdd->scratch_limit); 911 912 args->num_of_nodes++; 913 914 pdd = kfd_get_next_process_device_data(p, pdd); 915 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 916 } 917 918 mutex_unlock(&p->mutex); 919 920 return 0; 921 } 922 923 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 924 struct kfd_process *p, void *data) 925 { 926 struct kfd_ioctl_get_process_apertures_new_args *args = data; 927 struct kfd_process_device_apertures *pa; 928 struct kfd_process_device *pdd; 929 uint32_t nodes = 0; 930 int ret; 931 932 dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); 933 934 if (args->num_of_nodes == 0) { 935 /* Return number of nodes, so that user space can alloacate 936 * sufficient memory 937 */ 938 mutex_lock(&p->mutex); 939 940 if (!kfd_has_process_device_data(p)) 941 goto out_unlock; 942 943 /* Run over all pdd of the process */ 944 pdd = kfd_get_first_process_device_data(p); 945 do { 946 args->num_of_nodes++; 947 pdd = kfd_get_next_process_device_data(p, pdd); 948 } while (pdd); 949 950 goto out_unlock; 951 } 952 953 /* Fill in process-aperture information for all available 954 * nodes, but not more than args->num_of_nodes as that is 955 * the amount of memory allocated by user 956 */ 957 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 958 args->num_of_nodes), GFP_KERNEL); 959 if (!pa) 960 return -ENOMEM; 961 962 mutex_lock(&p->mutex); 963 964 if (!kfd_has_process_device_data(p)) { 965 args->num_of_nodes = 0; 966 kfree(pa); 967 goto out_unlock; 968 } 969 970 /* Run over all pdd of the process */ 971 pdd = kfd_get_first_process_device_data(p); 972 do { 973 pa[nodes].gpu_id = pdd->dev->id; 974 pa[nodes].lds_base = pdd->lds_base; 975 pa[nodes].lds_limit = pdd->lds_limit; 976 pa[nodes].gpuvm_base = pdd->gpuvm_base; 977 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 978 pa[nodes].scratch_base = pdd->scratch_base; 979 pa[nodes].scratch_limit = pdd->scratch_limit; 980 981 dev_dbg(kfd_device, 982 "gpu id %u\n", pdd->dev->id); 983 dev_dbg(kfd_device, 984 "lds_base %llX\n", pdd->lds_base); 985 dev_dbg(kfd_device, 986 "lds_limit %llX\n", pdd->lds_limit); 987 dev_dbg(kfd_device, 988 "gpuvm_base %llX\n", pdd->gpuvm_base); 989 dev_dbg(kfd_device, 990 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 991 dev_dbg(kfd_device, 992 "scratch_base %llX\n", pdd->scratch_base); 993 dev_dbg(kfd_device, 994 "scratch_limit %llX\n", pdd->scratch_limit); 995 nodes++; 996 997 pdd = kfd_get_next_process_device_data(p, pdd); 998 } while (pdd && (nodes < args->num_of_nodes)); 999 mutex_unlock(&p->mutex); 1000 1001 args->num_of_nodes = nodes; 1002 ret = copy_to_user( 1003 (void __user *)args->kfd_process_device_apertures_ptr, 1004 pa, 1005 (nodes * sizeof(struct kfd_process_device_apertures))); 1006 kfree(pa); 1007 return ret ? -EFAULT : 0; 1008 1009 out_unlock: 1010 mutex_unlock(&p->mutex); 1011 return 0; 1012 } 1013 1014 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 1015 void *data) 1016 { 1017 struct kfd_ioctl_create_event_args *args = data; 1018 int err; 1019 1020 /* For dGPUs the event page is allocated in user mode. The 1021 * handle is passed to KFD with the first call to this IOCTL 1022 * through the event_page_offset field. 1023 */ 1024 if (args->event_page_offset) { 1025 struct kfd_dev *kfd; 1026 struct kfd_process_device *pdd; 1027 void *mem, *kern_addr; 1028 uint64_t size; 1029 1030 if (p->signal_page) { 1031 pr_err("Event page is already set\n"); 1032 return -EINVAL; 1033 } 1034 1035 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 1036 if (!kfd) { 1037 pr_err("Getting device by id failed in %s\n", __func__); 1038 return -EINVAL; 1039 } 1040 1041 mutex_lock(&p->mutex); 1042 pdd = kfd_bind_process_to_device(kfd, p); 1043 if (IS_ERR(pdd)) { 1044 err = PTR_ERR(pdd); 1045 goto out_unlock; 1046 } 1047 1048 mem = kfd_process_device_translate_handle(pdd, 1049 GET_IDR_HANDLE(args->event_page_offset)); 1050 if (!mem) { 1051 pr_err("Can't find BO, offset is 0x%llx\n", 1052 args->event_page_offset); 1053 err = -EINVAL; 1054 goto out_unlock; 1055 } 1056 mutex_unlock(&p->mutex); 1057 1058 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, 1059 mem, &kern_addr, &size); 1060 if (err) { 1061 pr_err("Failed to map event page to kernel\n"); 1062 return err; 1063 } 1064 1065 err = kfd_event_page_set(p, kern_addr, size); 1066 if (err) { 1067 pr_err("Failed to set event page\n"); 1068 return err; 1069 } 1070 } 1071 1072 err = kfd_event_create(filp, p, args->event_type, 1073 args->auto_reset != 0, args->node_id, 1074 &args->event_id, &args->event_trigger_data, 1075 &args->event_page_offset, 1076 &args->event_slot_index); 1077 1078 return err; 1079 1080 out_unlock: 1081 mutex_unlock(&p->mutex); 1082 return err; 1083 } 1084 1085 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1086 void *data) 1087 { 1088 struct kfd_ioctl_destroy_event_args *args = data; 1089 1090 return kfd_event_destroy(p, args->event_id); 1091 } 1092 1093 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1094 void *data) 1095 { 1096 struct kfd_ioctl_set_event_args *args = data; 1097 1098 return kfd_set_event(p, args->event_id); 1099 } 1100 1101 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1102 void *data) 1103 { 1104 struct kfd_ioctl_reset_event_args *args = data; 1105 1106 return kfd_reset_event(p, args->event_id); 1107 } 1108 1109 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1110 void *data) 1111 { 1112 struct kfd_ioctl_wait_events_args *args = data; 1113 int err; 1114 1115 err = kfd_wait_on_events(p, args->num_events, 1116 (void __user *)args->events_ptr, 1117 (args->wait_for_all != 0), 1118 args->timeout, &args->wait_result); 1119 1120 return err; 1121 } 1122 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1123 struct kfd_process *p, void *data) 1124 { 1125 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1126 struct kfd_process_device *pdd; 1127 struct kfd_dev *dev; 1128 long err; 1129 1130 dev = kfd_device_by_id(args->gpu_id); 1131 if (!dev) 1132 return -EINVAL; 1133 1134 mutex_lock(&p->mutex); 1135 1136 pdd = kfd_bind_process_to_device(dev, p); 1137 if (IS_ERR(pdd)) { 1138 err = PTR_ERR(pdd); 1139 goto bind_process_to_device_fail; 1140 } 1141 1142 pdd->qpd.sh_hidden_private_base = args->va_addr; 1143 1144 mutex_unlock(&p->mutex); 1145 1146 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1147 pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) 1148 dev->kfd2kgd->set_scratch_backing_va( 1149 dev->kgd, args->va_addr, pdd->qpd.vmid); 1150 1151 return 0; 1152 1153 bind_process_to_device_fail: 1154 mutex_unlock(&p->mutex); 1155 return err; 1156 } 1157 1158 static int kfd_ioctl_get_tile_config(struct file *filep, 1159 struct kfd_process *p, void *data) 1160 { 1161 struct kfd_ioctl_get_tile_config_args *args = data; 1162 struct kfd_dev *dev; 1163 struct tile_config config; 1164 int err = 0; 1165 1166 dev = kfd_device_by_id(args->gpu_id); 1167 if (!dev) 1168 return -EINVAL; 1169 1170 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1171 1172 args->gb_addr_config = config.gb_addr_config; 1173 args->num_banks = config.num_banks; 1174 args->num_ranks = config.num_ranks; 1175 1176 if (args->num_tile_configs > config.num_tile_configs) 1177 args->num_tile_configs = config.num_tile_configs; 1178 err = copy_to_user((void __user *)args->tile_config_ptr, 1179 config.tile_config_ptr, 1180 args->num_tile_configs * sizeof(uint32_t)); 1181 if (err) { 1182 args->num_tile_configs = 0; 1183 return -EFAULT; 1184 } 1185 1186 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1187 args->num_macro_tile_configs = 1188 config.num_macro_tile_configs; 1189 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1190 config.macro_tile_config_ptr, 1191 args->num_macro_tile_configs * sizeof(uint32_t)); 1192 if (err) { 1193 args->num_macro_tile_configs = 0; 1194 return -EFAULT; 1195 } 1196 1197 return 0; 1198 } 1199 1200 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1201 void *data) 1202 { 1203 struct kfd_ioctl_acquire_vm_args *args = data; 1204 struct kfd_process_device *pdd; 1205 struct kfd_dev *dev; 1206 struct file *drm_file; 1207 int ret; 1208 1209 dev = kfd_device_by_id(args->gpu_id); 1210 if (!dev) 1211 return -EINVAL; 1212 1213 drm_file = fget(args->drm_fd); 1214 if (!drm_file) 1215 return -EINVAL; 1216 1217 mutex_lock(&p->mutex); 1218 1219 pdd = kfd_get_process_device_data(dev, p); 1220 if (!pdd) { 1221 ret = -EINVAL; 1222 goto err_unlock; 1223 } 1224 1225 if (pdd->drm_file) { 1226 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1227 goto err_unlock; 1228 } 1229 1230 ret = kfd_process_device_init_vm(pdd, drm_file); 1231 if (ret) 1232 goto err_unlock; 1233 /* On success, the PDD keeps the drm_file reference */ 1234 mutex_unlock(&p->mutex); 1235 1236 return 0; 1237 1238 err_unlock: 1239 mutex_unlock(&p->mutex); 1240 fput(drm_file); 1241 return ret; 1242 } 1243 1244 bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1245 { 1246 struct kfd_local_mem_info mem_info; 1247 1248 if (debug_largebar) { 1249 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1250 return true; 1251 } 1252 1253 if (dev->device_info->needs_iommu_device) 1254 return false; 1255 1256 amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); 1257 if (mem_info.local_mem_size_private == 0 && 1258 mem_info.local_mem_size_public > 0) 1259 return true; 1260 return false; 1261 } 1262 1263 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1264 struct kfd_process *p, void *data) 1265 { 1266 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1267 struct kfd_process_device *pdd; 1268 void *mem; 1269 struct kfd_dev *dev; 1270 int idr_handle; 1271 long err; 1272 uint64_t offset = args->mmap_offset; 1273 uint32_t flags = args->flags; 1274 1275 if (args->size == 0) 1276 return -EINVAL; 1277 1278 dev = kfd_device_by_id(args->gpu_id); 1279 if (!dev) 1280 return -EINVAL; 1281 1282 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1283 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1284 !kfd_dev_is_large_bar(dev)) { 1285 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1286 return -EINVAL; 1287 } 1288 1289 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 1290 if (args->size != kfd_doorbell_process_slice(dev)) 1291 return -EINVAL; 1292 offset = kfd_get_process_doorbells(dev, p); 1293 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 1294 if (args->size != PAGE_SIZE) 1295 return -EINVAL; 1296 offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); 1297 if (!offset) 1298 return -ENOMEM; 1299 } 1300 1301 mutex_lock(&p->mutex); 1302 1303 pdd = kfd_bind_process_to_device(dev, p); 1304 if (IS_ERR(pdd)) { 1305 err = PTR_ERR(pdd); 1306 goto err_unlock; 1307 } 1308 1309 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1310 dev->kgd, args->va_addr, args->size, 1311 pdd->vm, (struct kgd_mem **) &mem, &offset, 1312 flags); 1313 1314 if (err) 1315 goto err_unlock; 1316 1317 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1318 if (idr_handle < 0) { 1319 err = -EFAULT; 1320 goto err_free; 1321 } 1322 1323 mutex_unlock(&p->mutex); 1324 1325 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1326 args->mmap_offset = offset; 1327 1328 /* MMIO is mapped through kfd device 1329 * Generate a kfd mmap offset 1330 */ 1331 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) 1332 args->mmap_offset = KFD_MMAP_TYPE_MMIO 1333 | KFD_MMAP_GPU_ID(args->gpu_id); 1334 1335 return 0; 1336 1337 err_free: 1338 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1339 err_unlock: 1340 mutex_unlock(&p->mutex); 1341 return err; 1342 } 1343 1344 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1345 struct kfd_process *p, void *data) 1346 { 1347 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1348 struct kfd_process_device *pdd; 1349 void *mem; 1350 struct kfd_dev *dev; 1351 int ret; 1352 1353 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1354 if (!dev) 1355 return -EINVAL; 1356 1357 mutex_lock(&p->mutex); 1358 1359 pdd = kfd_get_process_device_data(dev, p); 1360 if (!pdd) { 1361 pr_err("Process device data doesn't exist\n"); 1362 ret = -EINVAL; 1363 goto err_unlock; 1364 } 1365 1366 mem = kfd_process_device_translate_handle( 1367 pdd, GET_IDR_HANDLE(args->handle)); 1368 if (!mem) { 1369 ret = -EINVAL; 1370 goto err_unlock; 1371 } 1372 1373 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, 1374 (struct kgd_mem *)mem); 1375 1376 /* If freeing the buffer failed, leave the handle in place for 1377 * clean-up during process tear-down. 1378 */ 1379 if (!ret) 1380 kfd_process_device_remove_obj_handle( 1381 pdd, GET_IDR_HANDLE(args->handle)); 1382 1383 err_unlock: 1384 mutex_unlock(&p->mutex); 1385 return ret; 1386 } 1387 1388 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1389 struct kfd_process *p, void *data) 1390 { 1391 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1392 struct kfd_process_device *pdd, *peer_pdd; 1393 void *mem; 1394 struct kfd_dev *dev, *peer; 1395 long err = 0; 1396 int i; 1397 uint32_t *devices_arr = NULL; 1398 1399 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1400 if (!dev) 1401 return -EINVAL; 1402 1403 if (!args->n_devices) { 1404 pr_debug("Device IDs array empty\n"); 1405 return -EINVAL; 1406 } 1407 if (args->n_success > args->n_devices) { 1408 pr_debug("n_success exceeds n_devices\n"); 1409 return -EINVAL; 1410 } 1411 1412 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1413 GFP_KERNEL); 1414 if (!devices_arr) 1415 return -ENOMEM; 1416 1417 err = copy_from_user(devices_arr, 1418 (void __user *)args->device_ids_array_ptr, 1419 args->n_devices * sizeof(*devices_arr)); 1420 if (err != 0) { 1421 err = -EFAULT; 1422 goto copy_from_user_failed; 1423 } 1424 1425 mutex_lock(&p->mutex); 1426 1427 pdd = kfd_bind_process_to_device(dev, p); 1428 if (IS_ERR(pdd)) { 1429 err = PTR_ERR(pdd); 1430 goto bind_process_to_device_failed; 1431 } 1432 1433 mem = kfd_process_device_translate_handle(pdd, 1434 GET_IDR_HANDLE(args->handle)); 1435 if (!mem) { 1436 err = -ENOMEM; 1437 goto get_mem_obj_from_handle_failed; 1438 } 1439 1440 for (i = args->n_success; i < args->n_devices; i++) { 1441 peer = kfd_device_by_id(devices_arr[i]); 1442 if (!peer) { 1443 pr_debug("Getting device by id failed for 0x%x\n", 1444 devices_arr[i]); 1445 err = -EINVAL; 1446 goto get_mem_obj_from_handle_failed; 1447 } 1448 1449 peer_pdd = kfd_bind_process_to_device(peer, p); 1450 if (IS_ERR(peer_pdd)) { 1451 err = PTR_ERR(peer_pdd); 1452 goto get_mem_obj_from_handle_failed; 1453 } 1454 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1455 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1456 if (err) { 1457 pr_err("Failed to map to gpu %d/%d\n", 1458 i, args->n_devices); 1459 goto map_memory_to_gpu_failed; 1460 } 1461 args->n_success = i+1; 1462 } 1463 1464 mutex_unlock(&p->mutex); 1465 1466 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1467 if (err) { 1468 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1469 goto sync_memory_failed; 1470 } 1471 1472 /* Flush TLBs after waiting for the page table updates to complete */ 1473 for (i = 0; i < args->n_devices; i++) { 1474 peer = kfd_device_by_id(devices_arr[i]); 1475 if (WARN_ON_ONCE(!peer)) 1476 continue; 1477 peer_pdd = kfd_get_process_device_data(peer, p); 1478 if (WARN_ON_ONCE(!peer_pdd)) 1479 continue; 1480 kfd_flush_tlb(peer_pdd); 1481 } 1482 1483 kfree(devices_arr); 1484 1485 return err; 1486 1487 bind_process_to_device_failed: 1488 get_mem_obj_from_handle_failed: 1489 map_memory_to_gpu_failed: 1490 mutex_unlock(&p->mutex); 1491 copy_from_user_failed: 1492 sync_memory_failed: 1493 kfree(devices_arr); 1494 1495 return err; 1496 } 1497 1498 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1499 struct kfd_process *p, void *data) 1500 { 1501 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1502 struct kfd_process_device *pdd, *peer_pdd; 1503 void *mem; 1504 struct kfd_dev *dev, *peer; 1505 long err = 0; 1506 uint32_t *devices_arr = NULL, i; 1507 1508 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1509 if (!dev) 1510 return -EINVAL; 1511 1512 if (!args->n_devices) { 1513 pr_debug("Device IDs array empty\n"); 1514 return -EINVAL; 1515 } 1516 if (args->n_success > args->n_devices) { 1517 pr_debug("n_success exceeds n_devices\n"); 1518 return -EINVAL; 1519 } 1520 1521 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1522 GFP_KERNEL); 1523 if (!devices_arr) 1524 return -ENOMEM; 1525 1526 err = copy_from_user(devices_arr, 1527 (void __user *)args->device_ids_array_ptr, 1528 args->n_devices * sizeof(*devices_arr)); 1529 if (err != 0) { 1530 err = -EFAULT; 1531 goto copy_from_user_failed; 1532 } 1533 1534 mutex_lock(&p->mutex); 1535 1536 pdd = kfd_get_process_device_data(dev, p); 1537 if (!pdd) { 1538 err = -EINVAL; 1539 goto bind_process_to_device_failed; 1540 } 1541 1542 mem = kfd_process_device_translate_handle(pdd, 1543 GET_IDR_HANDLE(args->handle)); 1544 if (!mem) { 1545 err = -ENOMEM; 1546 goto get_mem_obj_from_handle_failed; 1547 } 1548 1549 for (i = args->n_success; i < args->n_devices; i++) { 1550 peer = kfd_device_by_id(devices_arr[i]); 1551 if (!peer) { 1552 err = -EINVAL; 1553 goto get_mem_obj_from_handle_failed; 1554 } 1555 1556 peer_pdd = kfd_get_process_device_data(peer, p); 1557 if (!peer_pdd) { 1558 err = -ENODEV; 1559 goto get_mem_obj_from_handle_failed; 1560 } 1561 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1562 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1563 if (err) { 1564 pr_err("Failed to unmap from gpu %d/%d\n", 1565 i, args->n_devices); 1566 goto unmap_memory_from_gpu_failed; 1567 } 1568 args->n_success = i+1; 1569 } 1570 kfree(devices_arr); 1571 1572 mutex_unlock(&p->mutex); 1573 1574 return 0; 1575 1576 bind_process_to_device_failed: 1577 get_mem_obj_from_handle_failed: 1578 unmap_memory_from_gpu_failed: 1579 mutex_unlock(&p->mutex); 1580 copy_from_user_failed: 1581 kfree(devices_arr); 1582 return err; 1583 } 1584 1585 static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1586 struct kfd_process *p, void *data) 1587 { 1588 struct kfd_ioctl_get_dmabuf_info_args *args = data; 1589 struct kfd_dev *dev = NULL; 1590 struct kgd_dev *dma_buf_kgd; 1591 void *metadata_buffer = NULL; 1592 uint32_t flags; 1593 unsigned int i; 1594 int r; 1595 1596 /* Find a KFD GPU device that supports the get_dmabuf_info query */ 1597 for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) 1598 if (dev) 1599 break; 1600 if (!dev) 1601 return -EINVAL; 1602 1603 if (args->metadata_ptr) { 1604 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); 1605 if (!metadata_buffer) 1606 return -ENOMEM; 1607 } 1608 1609 /* Get dmabuf info from KGD */ 1610 r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, 1611 &dma_buf_kgd, &args->size, 1612 metadata_buffer, args->metadata_size, 1613 &args->metadata_size, &flags); 1614 if (r) 1615 goto exit; 1616 1617 /* Reverse-lookup gpu_id from kgd pointer */ 1618 dev = kfd_device_by_kgd(dma_buf_kgd); 1619 if (!dev) { 1620 r = -EINVAL; 1621 goto exit; 1622 } 1623 args->gpu_id = dev->id; 1624 args->flags = flags; 1625 1626 /* Copy metadata buffer to user mode */ 1627 if (metadata_buffer) { 1628 r = copy_to_user((void __user *)args->metadata_ptr, 1629 metadata_buffer, args->metadata_size); 1630 if (r != 0) 1631 r = -EFAULT; 1632 } 1633 1634 exit: 1635 kfree(metadata_buffer); 1636 1637 return r; 1638 } 1639 1640 static int kfd_ioctl_import_dmabuf(struct file *filep, 1641 struct kfd_process *p, void *data) 1642 { 1643 struct kfd_ioctl_import_dmabuf_args *args = data; 1644 struct kfd_process_device *pdd; 1645 struct dma_buf *dmabuf; 1646 struct kfd_dev *dev; 1647 int idr_handle; 1648 uint64_t size; 1649 void *mem; 1650 int r; 1651 1652 dev = kfd_device_by_id(args->gpu_id); 1653 if (!dev) 1654 return -EINVAL; 1655 1656 dmabuf = dma_buf_get(args->dmabuf_fd); 1657 if (IS_ERR(dmabuf)) 1658 return PTR_ERR(dmabuf); 1659 1660 mutex_lock(&p->mutex); 1661 1662 pdd = kfd_bind_process_to_device(dev, p); 1663 if (IS_ERR(pdd)) { 1664 r = PTR_ERR(pdd); 1665 goto err_unlock; 1666 } 1667 1668 r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, 1669 args->va_addr, pdd->vm, 1670 (struct kgd_mem **)&mem, &size, 1671 NULL); 1672 if (r) 1673 goto err_unlock; 1674 1675 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1676 if (idr_handle < 0) { 1677 r = -EFAULT; 1678 goto err_free; 1679 } 1680 1681 mutex_unlock(&p->mutex); 1682 1683 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1684 1685 return 0; 1686 1687 err_free: 1688 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1689 err_unlock: 1690 mutex_unlock(&p->mutex); 1691 return r; 1692 } 1693 1694 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1695 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1696 .cmd_drv = 0, .name = #ioctl} 1697 1698 /** Ioctl table */ 1699 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1700 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1701 kfd_ioctl_get_version, 0), 1702 1703 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1704 kfd_ioctl_create_queue, 0), 1705 1706 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1707 kfd_ioctl_destroy_queue, 0), 1708 1709 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1710 kfd_ioctl_set_memory_policy, 0), 1711 1712 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1713 kfd_ioctl_get_clock_counters, 0), 1714 1715 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1716 kfd_ioctl_get_process_apertures, 0), 1717 1718 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1719 kfd_ioctl_update_queue, 0), 1720 1721 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1722 kfd_ioctl_create_event, 0), 1723 1724 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1725 kfd_ioctl_destroy_event, 0), 1726 1727 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1728 kfd_ioctl_set_event, 0), 1729 1730 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1731 kfd_ioctl_reset_event, 0), 1732 1733 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1734 kfd_ioctl_wait_events, 0), 1735 1736 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1737 kfd_ioctl_dbg_register, 0), 1738 1739 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1740 kfd_ioctl_dbg_unregister, 0), 1741 1742 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1743 kfd_ioctl_dbg_address_watch, 0), 1744 1745 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1746 kfd_ioctl_dbg_wave_control, 0), 1747 1748 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1749 kfd_ioctl_set_scratch_backing_va, 0), 1750 1751 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1752 kfd_ioctl_get_tile_config, 0), 1753 1754 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1755 kfd_ioctl_set_trap_handler, 0), 1756 1757 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1758 kfd_ioctl_get_process_apertures_new, 0), 1759 1760 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1761 kfd_ioctl_acquire_vm, 0), 1762 1763 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1764 kfd_ioctl_alloc_memory_of_gpu, 0), 1765 1766 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1767 kfd_ioctl_free_memory_of_gpu, 0), 1768 1769 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1770 kfd_ioctl_map_memory_to_gpu, 0), 1771 1772 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1773 kfd_ioctl_unmap_memory_from_gpu, 0), 1774 1775 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 1776 kfd_ioctl_set_cu_mask, 0), 1777 1778 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 1779 kfd_ioctl_get_queue_wave_state, 0), 1780 1781 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, 1782 kfd_ioctl_get_dmabuf_info, 0), 1783 1784 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 1785 kfd_ioctl_import_dmabuf, 0), 1786 1787 }; 1788 1789 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1790 1791 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1792 { 1793 struct kfd_process *process; 1794 amdkfd_ioctl_t *func; 1795 const struct amdkfd_ioctl_desc *ioctl = NULL; 1796 unsigned int nr = _IOC_NR(cmd); 1797 char stack_kdata[128]; 1798 char *kdata = NULL; 1799 unsigned int usize, asize; 1800 int retcode = -EINVAL; 1801 1802 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1803 goto err_i1; 1804 1805 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1806 u32 amdkfd_size; 1807 1808 ioctl = &amdkfd_ioctls[nr]; 1809 1810 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1811 usize = asize = _IOC_SIZE(cmd); 1812 if (amdkfd_size > asize) 1813 asize = amdkfd_size; 1814 1815 cmd = ioctl->cmd; 1816 } else 1817 goto err_i1; 1818 1819 dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); 1820 1821 /* Get the process struct from the filep. Only the process 1822 * that opened /dev/kfd can use the file descriptor. Child 1823 * processes need to create their own KFD device context. 1824 */ 1825 process = filep->private_data; 1826 if (process->lead_thread != current->group_leader) { 1827 dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); 1828 retcode = -EBADF; 1829 goto err_i1; 1830 } 1831 1832 /* Do not trust userspace, use our own definition */ 1833 func = ioctl->func; 1834 1835 if (unlikely(!func)) { 1836 dev_dbg(kfd_device, "no function\n"); 1837 retcode = -EINVAL; 1838 goto err_i1; 1839 } 1840 1841 if (cmd & (IOC_IN | IOC_OUT)) { 1842 if (asize <= sizeof(stack_kdata)) { 1843 kdata = stack_kdata; 1844 } else { 1845 kdata = kmalloc(asize, GFP_KERNEL); 1846 if (!kdata) { 1847 retcode = -ENOMEM; 1848 goto err_i1; 1849 } 1850 } 1851 if (asize > usize) 1852 memset(kdata + usize, 0, asize - usize); 1853 } 1854 1855 if (cmd & IOC_IN) { 1856 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1857 retcode = -EFAULT; 1858 goto err_i1; 1859 } 1860 } else if (cmd & IOC_OUT) { 1861 memset(kdata, 0, usize); 1862 } 1863 1864 retcode = func(filep, process, kdata); 1865 1866 if (cmd & IOC_OUT) 1867 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1868 retcode = -EFAULT; 1869 1870 err_i1: 1871 if (!ioctl) 1872 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1873 task_pid_nr(current), cmd, nr); 1874 1875 if (kdata != stack_kdata) 1876 kfree(kdata); 1877 1878 if (retcode) 1879 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n", 1880 nr, arg, retcode); 1881 1882 return retcode; 1883 } 1884 1885 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, 1886 struct vm_area_struct *vma) 1887 { 1888 phys_addr_t address; 1889 int ret; 1890 1891 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1892 return -EINVAL; 1893 1894 address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); 1895 1896 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | 1897 VM_DONTDUMP | VM_PFNMAP; 1898 1899 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1900 1901 pr_debug("pasid 0x%x mapping mmio page\n" 1902 " target user address == 0x%08llX\n" 1903 " physical address == 0x%08llX\n" 1904 " vm_flags == 0x%04lX\n" 1905 " size == 0x%04lX\n", 1906 process->pasid, (unsigned long long) vma->vm_start, 1907 address, vma->vm_flags, PAGE_SIZE); 1908 1909 ret = io_remap_pfn_range(vma, 1910 vma->vm_start, 1911 address >> PAGE_SHIFT, 1912 PAGE_SIZE, 1913 vma->vm_page_prot); 1914 return ret; 1915 } 1916 1917 1918 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1919 { 1920 struct kfd_process *process; 1921 struct kfd_dev *dev = NULL; 1922 unsigned long mmap_offset; 1923 unsigned int gpu_id; 1924 1925 process = kfd_get_process(current); 1926 if (IS_ERR(process)) 1927 return PTR_ERR(process); 1928 1929 mmap_offset = vma->vm_pgoff << PAGE_SHIFT; 1930 gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset); 1931 if (gpu_id) 1932 dev = kfd_device_by_id(gpu_id); 1933 1934 switch (mmap_offset & KFD_MMAP_TYPE_MASK) { 1935 case KFD_MMAP_TYPE_DOORBELL: 1936 if (!dev) 1937 return -ENODEV; 1938 return kfd_doorbell_mmap(dev, process, vma); 1939 1940 case KFD_MMAP_TYPE_EVENTS: 1941 return kfd_event_mmap(process, vma); 1942 1943 case KFD_MMAP_TYPE_RESERVED_MEM: 1944 if (!dev) 1945 return -ENODEV; 1946 return kfd_reserved_mem_mmap(dev, process, vma); 1947 case KFD_MMAP_TYPE_MMIO: 1948 if (!dev) 1949 return -ENODEV; 1950 return kfd_mmio_mmap(dev, process, vma); 1951 } 1952 1953 return -EFAULT; 1954 } 1955