1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <linux/dma-buf.h> 37 #include <asm/processor.h> 38 #include "kfd_priv.h" 39 #include "kfd_device_queue_manager.h" 40 #include "kfd_dbgmgr.h" 41 #include "amdgpu_amdkfd.h" 42 43 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 44 static int kfd_open(struct inode *, struct file *); 45 static int kfd_mmap(struct file *, struct vm_area_struct *); 46 47 static const char kfd_dev_name[] = "kfd"; 48 49 static const struct file_operations kfd_fops = { 50 .owner = THIS_MODULE, 51 .unlocked_ioctl = kfd_ioctl, 52 .compat_ioctl = kfd_ioctl, 53 .open = kfd_open, 54 .mmap = kfd_mmap, 55 }; 56 57 static int kfd_char_dev_major = -1; 58 static struct class *kfd_class; 59 struct device *kfd_device; 60 61 int kfd_chardev_init(void) 62 { 63 int err = 0; 64 65 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 66 err = kfd_char_dev_major; 67 if (err < 0) 68 goto err_register_chrdev; 69 70 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 71 err = PTR_ERR(kfd_class); 72 if (IS_ERR(kfd_class)) 73 goto err_class_create; 74 75 kfd_device = device_create(kfd_class, NULL, 76 MKDEV(kfd_char_dev_major, 0), 77 NULL, kfd_dev_name); 78 err = PTR_ERR(kfd_device); 79 if (IS_ERR(kfd_device)) 80 goto err_device_create; 81 82 return 0; 83 84 err_device_create: 85 class_destroy(kfd_class); 86 err_class_create: 87 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 88 err_register_chrdev: 89 return err; 90 } 91 92 void kfd_chardev_exit(void) 93 { 94 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 95 class_destroy(kfd_class); 96 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 97 } 98 99 struct device *kfd_chardev(void) 100 { 101 return kfd_device; 102 } 103 104 105 static int kfd_open(struct inode *inode, struct file *filep) 106 { 107 struct kfd_process *process; 108 bool is_32bit_user_mode; 109 110 if (iminor(inode) != 0) 111 return -ENODEV; 112 113 is_32bit_user_mode = in_compat_syscall(); 114 115 if (is_32bit_user_mode) { 116 dev_warn(kfd_device, 117 "Process %d (32-bit) failed to open /dev/kfd\n" 118 "32-bit processes are not supported by amdkfd\n", 119 current->pid); 120 return -EPERM; 121 } 122 123 process = kfd_create_process(filep); 124 if (IS_ERR(process)) 125 return PTR_ERR(process); 126 127 if (kfd_is_locked()) 128 return -EAGAIN; 129 130 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 131 process->pasid, process->is_32bit_user_mode); 132 133 return 0; 134 } 135 136 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 137 void *data) 138 { 139 struct kfd_ioctl_get_version_args *args = data; 140 141 args->major_version = KFD_IOCTL_MAJOR_VERSION; 142 args->minor_version = KFD_IOCTL_MINOR_VERSION; 143 144 return 0; 145 } 146 147 static int set_queue_properties_from_user(struct queue_properties *q_properties, 148 struct kfd_ioctl_create_queue_args *args) 149 { 150 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 151 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 152 return -EINVAL; 153 } 154 155 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 156 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 157 return -EINVAL; 158 } 159 160 if ((args->ring_base_address) && 161 (!access_ok((const void __user *) args->ring_base_address, 162 sizeof(uint64_t)))) { 163 pr_err("Can't access ring base address\n"); 164 return -EFAULT; 165 } 166 167 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 168 pr_err("Ring size must be a power of 2 or 0\n"); 169 return -EINVAL; 170 } 171 172 if (!access_ok((const void __user *) args->read_pointer_address, 173 sizeof(uint32_t))) { 174 pr_err("Can't access read pointer\n"); 175 return -EFAULT; 176 } 177 178 if (!access_ok((const void __user *) args->write_pointer_address, 179 sizeof(uint32_t))) { 180 pr_err("Can't access write pointer\n"); 181 return -EFAULT; 182 } 183 184 if (args->eop_buffer_address && 185 !access_ok((const void __user *) args->eop_buffer_address, 186 sizeof(uint32_t))) { 187 pr_debug("Can't access eop buffer"); 188 return -EFAULT; 189 } 190 191 if (args->ctx_save_restore_address && 192 !access_ok((const void __user *) args->ctx_save_restore_address, 193 sizeof(uint32_t))) { 194 pr_debug("Can't access ctx save restore buffer"); 195 return -EFAULT; 196 } 197 198 q_properties->is_interop = false; 199 q_properties->queue_percent = args->queue_percentage; 200 q_properties->priority = args->queue_priority; 201 q_properties->queue_address = args->ring_base_address; 202 q_properties->queue_size = args->ring_size; 203 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 204 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 205 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 206 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 207 q_properties->ctx_save_restore_area_address = 208 args->ctx_save_restore_address; 209 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 210 q_properties->ctl_stack_size = args->ctl_stack_size; 211 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 212 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 213 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 214 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 215 q_properties->type = KFD_QUEUE_TYPE_SDMA; 216 else 217 return -ENOTSUPP; 218 219 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 220 q_properties->format = KFD_QUEUE_FORMAT_AQL; 221 else 222 q_properties->format = KFD_QUEUE_FORMAT_PM4; 223 224 pr_debug("Queue Percentage: %d, %d\n", 225 q_properties->queue_percent, args->queue_percentage); 226 227 pr_debug("Queue Priority: %d, %d\n", 228 q_properties->priority, args->queue_priority); 229 230 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 231 q_properties->queue_address, args->ring_base_address); 232 233 pr_debug("Queue Size: 0x%llX, %u\n", 234 q_properties->queue_size, args->ring_size); 235 236 pr_debug("Queue r/w Pointers: %px, %px\n", 237 q_properties->read_ptr, 238 q_properties->write_ptr); 239 240 pr_debug("Queue Format: %d\n", q_properties->format); 241 242 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 243 244 pr_debug("Queue CTX save area: 0x%llX\n", 245 q_properties->ctx_save_restore_area_address); 246 247 return 0; 248 } 249 250 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 251 void *data) 252 { 253 struct kfd_ioctl_create_queue_args *args = data; 254 struct kfd_dev *dev; 255 int err = 0; 256 unsigned int queue_id; 257 struct kfd_process_device *pdd; 258 struct queue_properties q_properties; 259 260 memset(&q_properties, 0, sizeof(struct queue_properties)); 261 262 pr_debug("Creating queue ioctl\n"); 263 264 err = set_queue_properties_from_user(&q_properties, args); 265 if (err) 266 return err; 267 268 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 269 dev = kfd_device_by_id(args->gpu_id); 270 if (!dev) { 271 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 272 return -EINVAL; 273 } 274 275 mutex_lock(&p->mutex); 276 277 pdd = kfd_bind_process_to_device(dev, p); 278 if (IS_ERR(pdd)) { 279 err = -ESRCH; 280 goto err_bind_process; 281 } 282 283 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 284 p->pasid, 285 dev->id); 286 287 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 288 if (err != 0) 289 goto err_create_queue; 290 291 args->queue_id = queue_id; 292 293 294 /* Return gpu_id as doorbell offset for mmap usage */ 295 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 296 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 297 args->doorbell_offset <<= PAGE_SHIFT; 298 if (KFD_IS_SOC15(dev->device_info->asic_family)) 299 /* On SOC15 ASICs, doorbell allocation must be 300 * per-device, and independent from the per-process 301 * queue_id. Return the doorbell offset within the 302 * doorbell aperture to user mode. 303 */ 304 args->doorbell_offset |= q_properties.doorbell_off; 305 306 mutex_unlock(&p->mutex); 307 308 pr_debug("Queue id %d was created successfully\n", args->queue_id); 309 310 pr_debug("Ring buffer address == 0x%016llX\n", 311 args->ring_base_address); 312 313 pr_debug("Read ptr address == 0x%016llX\n", 314 args->read_pointer_address); 315 316 pr_debug("Write ptr address == 0x%016llX\n", 317 args->write_pointer_address); 318 319 return 0; 320 321 err_create_queue: 322 err_bind_process: 323 mutex_unlock(&p->mutex); 324 return err; 325 } 326 327 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 328 void *data) 329 { 330 int retval; 331 struct kfd_ioctl_destroy_queue_args *args = data; 332 333 pr_debug("Destroying queue id %d for pasid %d\n", 334 args->queue_id, 335 p->pasid); 336 337 mutex_lock(&p->mutex); 338 339 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 340 341 mutex_unlock(&p->mutex); 342 return retval; 343 } 344 345 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 346 void *data) 347 { 348 int retval; 349 struct kfd_ioctl_update_queue_args *args = data; 350 struct queue_properties properties; 351 352 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 353 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 354 return -EINVAL; 355 } 356 357 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 358 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 359 return -EINVAL; 360 } 361 362 if ((args->ring_base_address) && 363 (!access_ok((const void __user *) args->ring_base_address, 364 sizeof(uint64_t)))) { 365 pr_err("Can't access ring base address\n"); 366 return -EFAULT; 367 } 368 369 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 370 pr_err("Ring size must be a power of 2 or 0\n"); 371 return -EINVAL; 372 } 373 374 properties.queue_address = args->ring_base_address; 375 properties.queue_size = args->ring_size; 376 properties.queue_percent = args->queue_percentage; 377 properties.priority = args->queue_priority; 378 379 pr_debug("Updating queue id %d for pasid %d\n", 380 args->queue_id, p->pasid); 381 382 mutex_lock(&p->mutex); 383 384 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 385 386 mutex_unlock(&p->mutex); 387 388 return retval; 389 } 390 391 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 392 void *data) 393 { 394 int retval; 395 const int max_num_cus = 1024; 396 struct kfd_ioctl_set_cu_mask_args *args = data; 397 struct queue_properties properties; 398 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 399 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 400 401 if ((args->num_cu_mask % 32) != 0) { 402 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 403 args->num_cu_mask); 404 return -EINVAL; 405 } 406 407 properties.cu_mask_count = args->num_cu_mask; 408 if (properties.cu_mask_count == 0) { 409 pr_debug("CU mask cannot be 0"); 410 return -EINVAL; 411 } 412 413 /* To prevent an unreasonably large CU mask size, set an arbitrary 414 * limit of max_num_cus bits. We can then just drop any CU mask bits 415 * past max_num_cus bits and just use the first max_num_cus bits. 416 */ 417 if (properties.cu_mask_count > max_num_cus) { 418 pr_debug("CU mask cannot be greater than 1024 bits"); 419 properties.cu_mask_count = max_num_cus; 420 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 421 } 422 423 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); 424 if (!properties.cu_mask) 425 return -ENOMEM; 426 427 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); 428 if (retval) { 429 pr_debug("Could not copy CU mask from userspace"); 430 kfree(properties.cu_mask); 431 return -EFAULT; 432 } 433 434 mutex_lock(&p->mutex); 435 436 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); 437 438 mutex_unlock(&p->mutex); 439 440 if (retval) 441 kfree(properties.cu_mask); 442 443 return retval; 444 } 445 446 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 447 struct kfd_process *p, void *data) 448 { 449 struct kfd_ioctl_get_queue_wave_state_args *args = data; 450 int r; 451 452 mutex_lock(&p->mutex); 453 454 r = pqm_get_wave_state(&p->pqm, args->queue_id, 455 (void __user *)args->ctl_stack_address, 456 &args->ctl_stack_used_size, 457 &args->save_area_used_size); 458 459 mutex_unlock(&p->mutex); 460 461 return r; 462 } 463 464 static int kfd_ioctl_set_memory_policy(struct file *filep, 465 struct kfd_process *p, void *data) 466 { 467 struct kfd_ioctl_set_memory_policy_args *args = data; 468 struct kfd_dev *dev; 469 int err = 0; 470 struct kfd_process_device *pdd; 471 enum cache_policy default_policy, alternate_policy; 472 473 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 474 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 475 return -EINVAL; 476 } 477 478 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 479 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 480 return -EINVAL; 481 } 482 483 dev = kfd_device_by_id(args->gpu_id); 484 if (!dev) 485 return -EINVAL; 486 487 mutex_lock(&p->mutex); 488 489 pdd = kfd_bind_process_to_device(dev, p); 490 if (IS_ERR(pdd)) { 491 err = -ESRCH; 492 goto out; 493 } 494 495 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 496 ? cache_policy_coherent : cache_policy_noncoherent; 497 498 alternate_policy = 499 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 500 ? cache_policy_coherent : cache_policy_noncoherent; 501 502 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 503 &pdd->qpd, 504 default_policy, 505 alternate_policy, 506 (void __user *)args->alternate_aperture_base, 507 args->alternate_aperture_size)) 508 err = -EINVAL; 509 510 out: 511 mutex_unlock(&p->mutex); 512 513 return err; 514 } 515 516 static int kfd_ioctl_set_trap_handler(struct file *filep, 517 struct kfd_process *p, void *data) 518 { 519 struct kfd_ioctl_set_trap_handler_args *args = data; 520 struct kfd_dev *dev; 521 int err = 0; 522 struct kfd_process_device *pdd; 523 524 dev = kfd_device_by_id(args->gpu_id); 525 if (dev == NULL) 526 return -EINVAL; 527 528 mutex_lock(&p->mutex); 529 530 pdd = kfd_bind_process_to_device(dev, p); 531 if (IS_ERR(pdd)) { 532 err = -ESRCH; 533 goto out; 534 } 535 536 if (dev->dqm->ops.set_trap_handler(dev->dqm, 537 &pdd->qpd, 538 args->tba_addr, 539 args->tma_addr)) 540 err = -EINVAL; 541 542 out: 543 mutex_unlock(&p->mutex); 544 545 return err; 546 } 547 548 static int kfd_ioctl_dbg_register(struct file *filep, 549 struct kfd_process *p, void *data) 550 { 551 struct kfd_ioctl_dbg_register_args *args = data; 552 struct kfd_dev *dev; 553 struct kfd_dbgmgr *dbgmgr_ptr; 554 struct kfd_process_device *pdd; 555 bool create_ok; 556 long status = 0; 557 558 dev = kfd_device_by_id(args->gpu_id); 559 if (!dev) 560 return -EINVAL; 561 562 if (dev->device_info->asic_family == CHIP_CARRIZO) { 563 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 564 return -EINVAL; 565 } 566 567 mutex_lock(&p->mutex); 568 mutex_lock(kfd_get_dbgmgr_mutex()); 569 570 /* 571 * make sure that we have pdd, if this the first queue created for 572 * this process 573 */ 574 pdd = kfd_bind_process_to_device(dev, p); 575 if (IS_ERR(pdd)) { 576 status = PTR_ERR(pdd); 577 goto out; 578 } 579 580 if (!dev->dbgmgr) { 581 /* In case of a legal call, we have no dbgmgr yet */ 582 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 583 if (create_ok) { 584 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 585 if (status != 0) 586 kfd_dbgmgr_destroy(dbgmgr_ptr); 587 else 588 dev->dbgmgr = dbgmgr_ptr; 589 } 590 } else { 591 pr_debug("debugger already registered\n"); 592 status = -EINVAL; 593 } 594 595 out: 596 mutex_unlock(kfd_get_dbgmgr_mutex()); 597 mutex_unlock(&p->mutex); 598 599 return status; 600 } 601 602 static int kfd_ioctl_dbg_unregister(struct file *filep, 603 struct kfd_process *p, void *data) 604 { 605 struct kfd_ioctl_dbg_unregister_args *args = data; 606 struct kfd_dev *dev; 607 long status; 608 609 dev = kfd_device_by_id(args->gpu_id); 610 if (!dev || !dev->dbgmgr) 611 return -EINVAL; 612 613 if (dev->device_info->asic_family == CHIP_CARRIZO) { 614 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 615 return -EINVAL; 616 } 617 618 mutex_lock(kfd_get_dbgmgr_mutex()); 619 620 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 621 if (!status) { 622 kfd_dbgmgr_destroy(dev->dbgmgr); 623 dev->dbgmgr = NULL; 624 } 625 626 mutex_unlock(kfd_get_dbgmgr_mutex()); 627 628 return status; 629 } 630 631 /* 632 * Parse and generate variable size data structure for address watch. 633 * Total size of the buffer and # watch points is limited in order 634 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 635 * which is enforced by dbgdev module) 636 * please also note that the watch address itself are not "copied from user", 637 * since it be set into the HW in user mode values. 638 * 639 */ 640 static int kfd_ioctl_dbg_address_watch(struct file *filep, 641 struct kfd_process *p, void *data) 642 { 643 struct kfd_ioctl_dbg_address_watch_args *args = data; 644 struct kfd_dev *dev; 645 struct dbg_address_watch_info aw_info; 646 unsigned char *args_buff; 647 long status; 648 void __user *cmd_from_user; 649 uint64_t watch_mask_value = 0; 650 unsigned int args_idx = 0; 651 652 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 653 654 dev = kfd_device_by_id(args->gpu_id); 655 if (!dev) 656 return -EINVAL; 657 658 if (dev->device_info->asic_family == CHIP_CARRIZO) { 659 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 660 return -EINVAL; 661 } 662 663 cmd_from_user = (void __user *) args->content_ptr; 664 665 /* Validate arguments */ 666 667 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 668 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 669 (cmd_from_user == NULL)) 670 return -EINVAL; 671 672 /* this is the actual buffer to work with */ 673 args_buff = memdup_user(cmd_from_user, 674 args->buf_size_in_bytes - sizeof(*args)); 675 if (IS_ERR(args_buff)) 676 return PTR_ERR(args_buff); 677 678 aw_info.process = p; 679 680 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 681 args_idx += sizeof(aw_info.num_watch_points); 682 683 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 684 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 685 686 /* 687 * set watch address base pointer to point on the array base 688 * within args_buff 689 */ 690 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 691 692 /* skip over the addresses buffer */ 693 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 694 695 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 696 status = -EINVAL; 697 goto out; 698 } 699 700 watch_mask_value = (uint64_t) args_buff[args_idx]; 701 702 if (watch_mask_value > 0) { 703 /* 704 * There is an array of masks. 705 * set watch mask base pointer to point on the array base 706 * within args_buff 707 */ 708 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 709 710 /* skip over the masks buffer */ 711 args_idx += sizeof(aw_info.watch_mask) * 712 aw_info.num_watch_points; 713 } else { 714 /* just the NULL mask, set to NULL and skip over it */ 715 aw_info.watch_mask = NULL; 716 args_idx += sizeof(aw_info.watch_mask); 717 } 718 719 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 720 status = -EINVAL; 721 goto out; 722 } 723 724 /* Currently HSA Event is not supported for DBG */ 725 aw_info.watch_event = NULL; 726 727 mutex_lock(kfd_get_dbgmgr_mutex()); 728 729 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 730 731 mutex_unlock(kfd_get_dbgmgr_mutex()); 732 733 out: 734 kfree(args_buff); 735 736 return status; 737 } 738 739 /* Parse and generate fixed size data structure for wave control */ 740 static int kfd_ioctl_dbg_wave_control(struct file *filep, 741 struct kfd_process *p, void *data) 742 { 743 struct kfd_ioctl_dbg_wave_control_args *args = data; 744 struct kfd_dev *dev; 745 struct dbg_wave_control_info wac_info; 746 unsigned char *args_buff; 747 uint32_t computed_buff_size; 748 long status; 749 void __user *cmd_from_user; 750 unsigned int args_idx = 0; 751 752 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 753 754 /* we use compact form, independent of the packing attribute value */ 755 computed_buff_size = sizeof(*args) + 756 sizeof(wac_info.mode) + 757 sizeof(wac_info.operand) + 758 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 759 sizeof(wac_info.dbgWave_msg.MemoryVA) + 760 sizeof(wac_info.trapId); 761 762 dev = kfd_device_by_id(args->gpu_id); 763 if (!dev) 764 return -EINVAL; 765 766 if (dev->device_info->asic_family == CHIP_CARRIZO) { 767 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 768 return -EINVAL; 769 } 770 771 /* input size must match the computed "compact" size */ 772 if (args->buf_size_in_bytes != computed_buff_size) { 773 pr_debug("size mismatch, computed : actual %u : %u\n", 774 args->buf_size_in_bytes, computed_buff_size); 775 return -EINVAL; 776 } 777 778 cmd_from_user = (void __user *) args->content_ptr; 779 780 if (cmd_from_user == NULL) 781 return -EINVAL; 782 783 /* copy the entire buffer from user */ 784 785 args_buff = memdup_user(cmd_from_user, 786 args->buf_size_in_bytes - sizeof(*args)); 787 if (IS_ERR(args_buff)) 788 return PTR_ERR(args_buff); 789 790 /* move ptr to the start of the "pay-load" area */ 791 wac_info.process = p; 792 793 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 794 args_idx += sizeof(wac_info.operand); 795 796 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 797 args_idx += sizeof(wac_info.mode); 798 799 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 800 args_idx += sizeof(wac_info.trapId); 801 802 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 803 *((uint32_t *)(&args_buff[args_idx])); 804 wac_info.dbgWave_msg.MemoryVA = NULL; 805 806 mutex_lock(kfd_get_dbgmgr_mutex()); 807 808 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 809 wac_info.process, wac_info.operand, 810 wac_info.mode, wac_info.trapId, 811 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 812 813 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 814 815 pr_debug("Returned status of dbg manager is %ld\n", status); 816 817 mutex_unlock(kfd_get_dbgmgr_mutex()); 818 819 kfree(args_buff); 820 821 return status; 822 } 823 824 static int kfd_ioctl_get_clock_counters(struct file *filep, 825 struct kfd_process *p, void *data) 826 { 827 struct kfd_ioctl_get_clock_counters_args *args = data; 828 struct kfd_dev *dev; 829 830 dev = kfd_device_by_id(args->gpu_id); 831 if (dev) 832 /* Reading GPU clock counter from KGD */ 833 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); 834 else 835 /* Node without GPU resource */ 836 args->gpu_clock_counter = 0; 837 838 /* No access to rdtsc. Using raw monotonic time */ 839 args->cpu_clock_counter = ktime_get_raw_ns(); 840 args->system_clock_counter = ktime_get_boot_ns(); 841 842 /* Since the counter is in nano-seconds we use 1GHz frequency */ 843 args->system_clock_freq = 1000000000; 844 845 return 0; 846 } 847 848 849 static int kfd_ioctl_get_process_apertures(struct file *filp, 850 struct kfd_process *p, void *data) 851 { 852 struct kfd_ioctl_get_process_apertures_args *args = data; 853 struct kfd_process_device_apertures *pAperture; 854 struct kfd_process_device *pdd; 855 856 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 857 858 args->num_of_nodes = 0; 859 860 mutex_lock(&p->mutex); 861 862 /*if the process-device list isn't empty*/ 863 if (kfd_has_process_device_data(p)) { 864 /* Run over all pdd of the process */ 865 pdd = kfd_get_first_process_device_data(p); 866 do { 867 pAperture = 868 &args->process_apertures[args->num_of_nodes]; 869 pAperture->gpu_id = pdd->dev->id; 870 pAperture->lds_base = pdd->lds_base; 871 pAperture->lds_limit = pdd->lds_limit; 872 pAperture->gpuvm_base = pdd->gpuvm_base; 873 pAperture->gpuvm_limit = pdd->gpuvm_limit; 874 pAperture->scratch_base = pdd->scratch_base; 875 pAperture->scratch_limit = pdd->scratch_limit; 876 877 dev_dbg(kfd_device, 878 "node id %u\n", args->num_of_nodes); 879 dev_dbg(kfd_device, 880 "gpu id %u\n", pdd->dev->id); 881 dev_dbg(kfd_device, 882 "lds_base %llX\n", pdd->lds_base); 883 dev_dbg(kfd_device, 884 "lds_limit %llX\n", pdd->lds_limit); 885 dev_dbg(kfd_device, 886 "gpuvm_base %llX\n", pdd->gpuvm_base); 887 dev_dbg(kfd_device, 888 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 889 dev_dbg(kfd_device, 890 "scratch_base %llX\n", pdd->scratch_base); 891 dev_dbg(kfd_device, 892 "scratch_limit %llX\n", pdd->scratch_limit); 893 894 args->num_of_nodes++; 895 896 pdd = kfd_get_next_process_device_data(p, pdd); 897 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 898 } 899 900 mutex_unlock(&p->mutex); 901 902 return 0; 903 } 904 905 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 906 struct kfd_process *p, void *data) 907 { 908 struct kfd_ioctl_get_process_apertures_new_args *args = data; 909 struct kfd_process_device_apertures *pa; 910 struct kfd_process_device *pdd; 911 uint32_t nodes = 0; 912 int ret; 913 914 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 915 916 if (args->num_of_nodes == 0) { 917 /* Return number of nodes, so that user space can alloacate 918 * sufficient memory 919 */ 920 mutex_lock(&p->mutex); 921 922 if (!kfd_has_process_device_data(p)) 923 goto out_unlock; 924 925 /* Run over all pdd of the process */ 926 pdd = kfd_get_first_process_device_data(p); 927 do { 928 args->num_of_nodes++; 929 pdd = kfd_get_next_process_device_data(p, pdd); 930 } while (pdd); 931 932 goto out_unlock; 933 } 934 935 /* Fill in process-aperture information for all available 936 * nodes, but not more than args->num_of_nodes as that is 937 * the amount of memory allocated by user 938 */ 939 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 940 args->num_of_nodes), GFP_KERNEL); 941 if (!pa) 942 return -ENOMEM; 943 944 mutex_lock(&p->mutex); 945 946 if (!kfd_has_process_device_data(p)) { 947 args->num_of_nodes = 0; 948 kfree(pa); 949 goto out_unlock; 950 } 951 952 /* Run over all pdd of the process */ 953 pdd = kfd_get_first_process_device_data(p); 954 do { 955 pa[nodes].gpu_id = pdd->dev->id; 956 pa[nodes].lds_base = pdd->lds_base; 957 pa[nodes].lds_limit = pdd->lds_limit; 958 pa[nodes].gpuvm_base = pdd->gpuvm_base; 959 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 960 pa[nodes].scratch_base = pdd->scratch_base; 961 pa[nodes].scratch_limit = pdd->scratch_limit; 962 963 dev_dbg(kfd_device, 964 "gpu id %u\n", pdd->dev->id); 965 dev_dbg(kfd_device, 966 "lds_base %llX\n", pdd->lds_base); 967 dev_dbg(kfd_device, 968 "lds_limit %llX\n", pdd->lds_limit); 969 dev_dbg(kfd_device, 970 "gpuvm_base %llX\n", pdd->gpuvm_base); 971 dev_dbg(kfd_device, 972 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 973 dev_dbg(kfd_device, 974 "scratch_base %llX\n", pdd->scratch_base); 975 dev_dbg(kfd_device, 976 "scratch_limit %llX\n", pdd->scratch_limit); 977 nodes++; 978 979 pdd = kfd_get_next_process_device_data(p, pdd); 980 } while (pdd && (nodes < args->num_of_nodes)); 981 mutex_unlock(&p->mutex); 982 983 args->num_of_nodes = nodes; 984 ret = copy_to_user( 985 (void __user *)args->kfd_process_device_apertures_ptr, 986 pa, 987 (nodes * sizeof(struct kfd_process_device_apertures))); 988 kfree(pa); 989 return ret ? -EFAULT : 0; 990 991 out_unlock: 992 mutex_unlock(&p->mutex); 993 return 0; 994 } 995 996 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 997 void *data) 998 { 999 struct kfd_ioctl_create_event_args *args = data; 1000 int err; 1001 1002 /* For dGPUs the event page is allocated in user mode. The 1003 * handle is passed to KFD with the first call to this IOCTL 1004 * through the event_page_offset field. 1005 */ 1006 if (args->event_page_offset) { 1007 struct kfd_dev *kfd; 1008 struct kfd_process_device *pdd; 1009 void *mem, *kern_addr; 1010 uint64_t size; 1011 1012 if (p->signal_page) { 1013 pr_err("Event page is already set\n"); 1014 return -EINVAL; 1015 } 1016 1017 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 1018 if (!kfd) { 1019 pr_err("Getting device by id failed in %s\n", __func__); 1020 return -EINVAL; 1021 } 1022 1023 mutex_lock(&p->mutex); 1024 pdd = kfd_bind_process_to_device(kfd, p); 1025 if (IS_ERR(pdd)) { 1026 err = PTR_ERR(pdd); 1027 goto out_unlock; 1028 } 1029 1030 mem = kfd_process_device_translate_handle(pdd, 1031 GET_IDR_HANDLE(args->event_page_offset)); 1032 if (!mem) { 1033 pr_err("Can't find BO, offset is 0x%llx\n", 1034 args->event_page_offset); 1035 err = -EINVAL; 1036 goto out_unlock; 1037 } 1038 mutex_unlock(&p->mutex); 1039 1040 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, 1041 mem, &kern_addr, &size); 1042 if (err) { 1043 pr_err("Failed to map event page to kernel\n"); 1044 return err; 1045 } 1046 1047 err = kfd_event_page_set(p, kern_addr, size); 1048 if (err) { 1049 pr_err("Failed to set event page\n"); 1050 return err; 1051 } 1052 } 1053 1054 err = kfd_event_create(filp, p, args->event_type, 1055 args->auto_reset != 0, args->node_id, 1056 &args->event_id, &args->event_trigger_data, 1057 &args->event_page_offset, 1058 &args->event_slot_index); 1059 1060 return err; 1061 1062 out_unlock: 1063 mutex_unlock(&p->mutex); 1064 return err; 1065 } 1066 1067 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1068 void *data) 1069 { 1070 struct kfd_ioctl_destroy_event_args *args = data; 1071 1072 return kfd_event_destroy(p, args->event_id); 1073 } 1074 1075 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1076 void *data) 1077 { 1078 struct kfd_ioctl_set_event_args *args = data; 1079 1080 return kfd_set_event(p, args->event_id); 1081 } 1082 1083 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1084 void *data) 1085 { 1086 struct kfd_ioctl_reset_event_args *args = data; 1087 1088 return kfd_reset_event(p, args->event_id); 1089 } 1090 1091 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1092 void *data) 1093 { 1094 struct kfd_ioctl_wait_events_args *args = data; 1095 int err; 1096 1097 err = kfd_wait_on_events(p, args->num_events, 1098 (void __user *)args->events_ptr, 1099 (args->wait_for_all != 0), 1100 args->timeout, &args->wait_result); 1101 1102 return err; 1103 } 1104 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1105 struct kfd_process *p, void *data) 1106 { 1107 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1108 struct kfd_process_device *pdd; 1109 struct kfd_dev *dev; 1110 long err; 1111 1112 dev = kfd_device_by_id(args->gpu_id); 1113 if (!dev) 1114 return -EINVAL; 1115 1116 mutex_lock(&p->mutex); 1117 1118 pdd = kfd_bind_process_to_device(dev, p); 1119 if (IS_ERR(pdd)) { 1120 err = PTR_ERR(pdd); 1121 goto bind_process_to_device_fail; 1122 } 1123 1124 pdd->qpd.sh_hidden_private_base = args->va_addr; 1125 1126 mutex_unlock(&p->mutex); 1127 1128 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1129 pdd->qpd.vmid != 0) 1130 dev->kfd2kgd->set_scratch_backing_va( 1131 dev->kgd, args->va_addr, pdd->qpd.vmid); 1132 1133 return 0; 1134 1135 bind_process_to_device_fail: 1136 mutex_unlock(&p->mutex); 1137 return err; 1138 } 1139 1140 static int kfd_ioctl_get_tile_config(struct file *filep, 1141 struct kfd_process *p, void *data) 1142 { 1143 struct kfd_ioctl_get_tile_config_args *args = data; 1144 struct kfd_dev *dev; 1145 struct tile_config config; 1146 int err = 0; 1147 1148 dev = kfd_device_by_id(args->gpu_id); 1149 if (!dev) 1150 return -EINVAL; 1151 1152 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1153 1154 args->gb_addr_config = config.gb_addr_config; 1155 args->num_banks = config.num_banks; 1156 args->num_ranks = config.num_ranks; 1157 1158 if (args->num_tile_configs > config.num_tile_configs) 1159 args->num_tile_configs = config.num_tile_configs; 1160 err = copy_to_user((void __user *)args->tile_config_ptr, 1161 config.tile_config_ptr, 1162 args->num_tile_configs * sizeof(uint32_t)); 1163 if (err) { 1164 args->num_tile_configs = 0; 1165 return -EFAULT; 1166 } 1167 1168 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1169 args->num_macro_tile_configs = 1170 config.num_macro_tile_configs; 1171 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1172 config.macro_tile_config_ptr, 1173 args->num_macro_tile_configs * sizeof(uint32_t)); 1174 if (err) { 1175 args->num_macro_tile_configs = 0; 1176 return -EFAULT; 1177 } 1178 1179 return 0; 1180 } 1181 1182 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1183 void *data) 1184 { 1185 struct kfd_ioctl_acquire_vm_args *args = data; 1186 struct kfd_process_device *pdd; 1187 struct kfd_dev *dev; 1188 struct file *drm_file; 1189 int ret; 1190 1191 dev = kfd_device_by_id(args->gpu_id); 1192 if (!dev) 1193 return -EINVAL; 1194 1195 drm_file = fget(args->drm_fd); 1196 if (!drm_file) 1197 return -EINVAL; 1198 1199 mutex_lock(&p->mutex); 1200 1201 pdd = kfd_get_process_device_data(dev, p); 1202 if (!pdd) { 1203 ret = -EINVAL; 1204 goto err_unlock; 1205 } 1206 1207 if (pdd->drm_file) { 1208 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1209 goto err_unlock; 1210 } 1211 1212 ret = kfd_process_device_init_vm(pdd, drm_file); 1213 if (ret) 1214 goto err_unlock; 1215 /* On success, the PDD keeps the drm_file reference */ 1216 mutex_unlock(&p->mutex); 1217 1218 return 0; 1219 1220 err_unlock: 1221 mutex_unlock(&p->mutex); 1222 fput(drm_file); 1223 return ret; 1224 } 1225 1226 bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1227 { 1228 struct kfd_local_mem_info mem_info; 1229 1230 if (debug_largebar) { 1231 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1232 return true; 1233 } 1234 1235 if (dev->device_info->needs_iommu_device) 1236 return false; 1237 1238 amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); 1239 if (mem_info.local_mem_size_private == 0 && 1240 mem_info.local_mem_size_public > 0) 1241 return true; 1242 return false; 1243 } 1244 1245 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1246 struct kfd_process *p, void *data) 1247 { 1248 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1249 struct kfd_process_device *pdd; 1250 void *mem; 1251 struct kfd_dev *dev; 1252 int idr_handle; 1253 long err; 1254 uint64_t offset = args->mmap_offset; 1255 uint32_t flags = args->flags; 1256 1257 if (args->size == 0) 1258 return -EINVAL; 1259 1260 dev = kfd_device_by_id(args->gpu_id); 1261 if (!dev) 1262 return -EINVAL; 1263 1264 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1265 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1266 !kfd_dev_is_large_bar(dev)) { 1267 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1268 return -EINVAL; 1269 } 1270 1271 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 1272 if (args->size != kfd_doorbell_process_slice(dev)) 1273 return -EINVAL; 1274 offset = kfd_get_process_doorbells(dev, p); 1275 } 1276 1277 mutex_lock(&p->mutex); 1278 1279 pdd = kfd_bind_process_to_device(dev, p); 1280 if (IS_ERR(pdd)) { 1281 err = PTR_ERR(pdd); 1282 goto err_unlock; 1283 } 1284 1285 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1286 dev->kgd, args->va_addr, args->size, 1287 pdd->vm, (struct kgd_mem **) &mem, &offset, 1288 flags); 1289 1290 if (err) 1291 goto err_unlock; 1292 1293 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1294 if (idr_handle < 0) { 1295 err = -EFAULT; 1296 goto err_free; 1297 } 1298 1299 mutex_unlock(&p->mutex); 1300 1301 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1302 args->mmap_offset = offset; 1303 1304 return 0; 1305 1306 err_free: 1307 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1308 err_unlock: 1309 mutex_unlock(&p->mutex); 1310 return err; 1311 } 1312 1313 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1314 struct kfd_process *p, void *data) 1315 { 1316 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1317 struct kfd_process_device *pdd; 1318 void *mem; 1319 struct kfd_dev *dev; 1320 int ret; 1321 1322 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1323 if (!dev) 1324 return -EINVAL; 1325 1326 mutex_lock(&p->mutex); 1327 1328 pdd = kfd_get_process_device_data(dev, p); 1329 if (!pdd) { 1330 pr_err("Process device data doesn't exist\n"); 1331 ret = -EINVAL; 1332 goto err_unlock; 1333 } 1334 1335 mem = kfd_process_device_translate_handle( 1336 pdd, GET_IDR_HANDLE(args->handle)); 1337 if (!mem) { 1338 ret = -EINVAL; 1339 goto err_unlock; 1340 } 1341 1342 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, 1343 (struct kgd_mem *)mem); 1344 1345 /* If freeing the buffer failed, leave the handle in place for 1346 * clean-up during process tear-down. 1347 */ 1348 if (!ret) 1349 kfd_process_device_remove_obj_handle( 1350 pdd, GET_IDR_HANDLE(args->handle)); 1351 1352 err_unlock: 1353 mutex_unlock(&p->mutex); 1354 return ret; 1355 } 1356 1357 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1358 struct kfd_process *p, void *data) 1359 { 1360 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1361 struct kfd_process_device *pdd, *peer_pdd; 1362 void *mem; 1363 struct kfd_dev *dev, *peer; 1364 long err = 0; 1365 int i; 1366 uint32_t *devices_arr = NULL; 1367 1368 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1369 if (!dev) 1370 return -EINVAL; 1371 1372 if (!args->n_devices) { 1373 pr_debug("Device IDs array empty\n"); 1374 return -EINVAL; 1375 } 1376 if (args->n_success > args->n_devices) { 1377 pr_debug("n_success exceeds n_devices\n"); 1378 return -EINVAL; 1379 } 1380 1381 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1382 GFP_KERNEL); 1383 if (!devices_arr) 1384 return -ENOMEM; 1385 1386 err = copy_from_user(devices_arr, 1387 (void __user *)args->device_ids_array_ptr, 1388 args->n_devices * sizeof(*devices_arr)); 1389 if (err != 0) { 1390 err = -EFAULT; 1391 goto copy_from_user_failed; 1392 } 1393 1394 mutex_lock(&p->mutex); 1395 1396 pdd = kfd_bind_process_to_device(dev, p); 1397 if (IS_ERR(pdd)) { 1398 err = PTR_ERR(pdd); 1399 goto bind_process_to_device_failed; 1400 } 1401 1402 mem = kfd_process_device_translate_handle(pdd, 1403 GET_IDR_HANDLE(args->handle)); 1404 if (!mem) { 1405 err = -ENOMEM; 1406 goto get_mem_obj_from_handle_failed; 1407 } 1408 1409 for (i = args->n_success; i < args->n_devices; i++) { 1410 peer = kfd_device_by_id(devices_arr[i]); 1411 if (!peer) { 1412 pr_debug("Getting device by id failed for 0x%x\n", 1413 devices_arr[i]); 1414 err = -EINVAL; 1415 goto get_mem_obj_from_handle_failed; 1416 } 1417 1418 peer_pdd = kfd_bind_process_to_device(peer, p); 1419 if (IS_ERR(peer_pdd)) { 1420 err = PTR_ERR(peer_pdd); 1421 goto get_mem_obj_from_handle_failed; 1422 } 1423 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1424 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1425 if (err) { 1426 pr_err("Failed to map to gpu %d/%d\n", 1427 i, args->n_devices); 1428 goto map_memory_to_gpu_failed; 1429 } 1430 args->n_success = i+1; 1431 } 1432 1433 mutex_unlock(&p->mutex); 1434 1435 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1436 if (err) { 1437 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1438 goto sync_memory_failed; 1439 } 1440 1441 /* Flush TLBs after waiting for the page table updates to complete */ 1442 for (i = 0; i < args->n_devices; i++) { 1443 peer = kfd_device_by_id(devices_arr[i]); 1444 if (WARN_ON_ONCE(!peer)) 1445 continue; 1446 peer_pdd = kfd_get_process_device_data(peer, p); 1447 if (WARN_ON_ONCE(!peer_pdd)) 1448 continue; 1449 kfd_flush_tlb(peer_pdd); 1450 } 1451 1452 kfree(devices_arr); 1453 1454 return err; 1455 1456 bind_process_to_device_failed: 1457 get_mem_obj_from_handle_failed: 1458 map_memory_to_gpu_failed: 1459 mutex_unlock(&p->mutex); 1460 copy_from_user_failed: 1461 sync_memory_failed: 1462 kfree(devices_arr); 1463 1464 return err; 1465 } 1466 1467 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1468 struct kfd_process *p, void *data) 1469 { 1470 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1471 struct kfd_process_device *pdd, *peer_pdd; 1472 void *mem; 1473 struct kfd_dev *dev, *peer; 1474 long err = 0; 1475 uint32_t *devices_arr = NULL, i; 1476 1477 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1478 if (!dev) 1479 return -EINVAL; 1480 1481 if (!args->n_devices) { 1482 pr_debug("Device IDs array empty\n"); 1483 return -EINVAL; 1484 } 1485 if (args->n_success > args->n_devices) { 1486 pr_debug("n_success exceeds n_devices\n"); 1487 return -EINVAL; 1488 } 1489 1490 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1491 GFP_KERNEL); 1492 if (!devices_arr) 1493 return -ENOMEM; 1494 1495 err = copy_from_user(devices_arr, 1496 (void __user *)args->device_ids_array_ptr, 1497 args->n_devices * sizeof(*devices_arr)); 1498 if (err != 0) { 1499 err = -EFAULT; 1500 goto copy_from_user_failed; 1501 } 1502 1503 mutex_lock(&p->mutex); 1504 1505 pdd = kfd_get_process_device_data(dev, p); 1506 if (!pdd) { 1507 err = -EINVAL; 1508 goto bind_process_to_device_failed; 1509 } 1510 1511 mem = kfd_process_device_translate_handle(pdd, 1512 GET_IDR_HANDLE(args->handle)); 1513 if (!mem) { 1514 err = -ENOMEM; 1515 goto get_mem_obj_from_handle_failed; 1516 } 1517 1518 for (i = args->n_success; i < args->n_devices; i++) { 1519 peer = kfd_device_by_id(devices_arr[i]); 1520 if (!peer) { 1521 err = -EINVAL; 1522 goto get_mem_obj_from_handle_failed; 1523 } 1524 1525 peer_pdd = kfd_get_process_device_data(peer, p); 1526 if (!peer_pdd) { 1527 err = -ENODEV; 1528 goto get_mem_obj_from_handle_failed; 1529 } 1530 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1531 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1532 if (err) { 1533 pr_err("Failed to unmap from gpu %d/%d\n", 1534 i, args->n_devices); 1535 goto unmap_memory_from_gpu_failed; 1536 } 1537 args->n_success = i+1; 1538 } 1539 kfree(devices_arr); 1540 1541 mutex_unlock(&p->mutex); 1542 1543 return 0; 1544 1545 bind_process_to_device_failed: 1546 get_mem_obj_from_handle_failed: 1547 unmap_memory_from_gpu_failed: 1548 mutex_unlock(&p->mutex); 1549 copy_from_user_failed: 1550 kfree(devices_arr); 1551 return err; 1552 } 1553 1554 static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1555 struct kfd_process *p, void *data) 1556 { 1557 struct kfd_ioctl_get_dmabuf_info_args *args = data; 1558 struct kfd_dev *dev = NULL; 1559 struct kgd_dev *dma_buf_kgd; 1560 void *metadata_buffer = NULL; 1561 uint32_t flags; 1562 unsigned int i; 1563 int r; 1564 1565 /* Find a KFD GPU device that supports the get_dmabuf_info query */ 1566 for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) 1567 if (dev) 1568 break; 1569 if (!dev) 1570 return -EINVAL; 1571 1572 if (args->metadata_ptr) { 1573 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); 1574 if (!metadata_buffer) 1575 return -ENOMEM; 1576 } 1577 1578 /* Get dmabuf info from KGD */ 1579 r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, 1580 &dma_buf_kgd, &args->size, 1581 metadata_buffer, args->metadata_size, 1582 &args->metadata_size, &flags); 1583 if (r) 1584 goto exit; 1585 1586 /* Reverse-lookup gpu_id from kgd pointer */ 1587 dev = kfd_device_by_kgd(dma_buf_kgd); 1588 if (!dev) { 1589 r = -EINVAL; 1590 goto exit; 1591 } 1592 args->gpu_id = dev->id; 1593 args->flags = flags; 1594 1595 /* Copy metadata buffer to user mode */ 1596 if (metadata_buffer) { 1597 r = copy_to_user((void __user *)args->metadata_ptr, 1598 metadata_buffer, args->metadata_size); 1599 if (r != 0) 1600 r = -EFAULT; 1601 } 1602 1603 exit: 1604 kfree(metadata_buffer); 1605 1606 return r; 1607 } 1608 1609 static int kfd_ioctl_import_dmabuf(struct file *filep, 1610 struct kfd_process *p, void *data) 1611 { 1612 struct kfd_ioctl_import_dmabuf_args *args = data; 1613 struct kfd_process_device *pdd; 1614 struct dma_buf *dmabuf; 1615 struct kfd_dev *dev; 1616 int idr_handle; 1617 uint64_t size; 1618 void *mem; 1619 int r; 1620 1621 dev = kfd_device_by_id(args->gpu_id); 1622 if (!dev) 1623 return -EINVAL; 1624 1625 dmabuf = dma_buf_get(args->dmabuf_fd); 1626 if (IS_ERR(dmabuf)) 1627 return PTR_ERR(dmabuf); 1628 1629 mutex_lock(&p->mutex); 1630 1631 pdd = kfd_bind_process_to_device(dev, p); 1632 if (IS_ERR(pdd)) { 1633 r = PTR_ERR(pdd); 1634 goto err_unlock; 1635 } 1636 1637 r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, 1638 args->va_addr, pdd->vm, 1639 (struct kgd_mem **)&mem, &size, 1640 NULL); 1641 if (r) 1642 goto err_unlock; 1643 1644 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1645 if (idr_handle < 0) { 1646 r = -EFAULT; 1647 goto err_free; 1648 } 1649 1650 mutex_unlock(&p->mutex); 1651 1652 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1653 1654 return 0; 1655 1656 err_free: 1657 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1658 err_unlock: 1659 mutex_unlock(&p->mutex); 1660 return r; 1661 } 1662 1663 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1664 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1665 .cmd_drv = 0, .name = #ioctl} 1666 1667 /** Ioctl table */ 1668 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1669 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1670 kfd_ioctl_get_version, 0), 1671 1672 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1673 kfd_ioctl_create_queue, 0), 1674 1675 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1676 kfd_ioctl_destroy_queue, 0), 1677 1678 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1679 kfd_ioctl_set_memory_policy, 0), 1680 1681 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1682 kfd_ioctl_get_clock_counters, 0), 1683 1684 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1685 kfd_ioctl_get_process_apertures, 0), 1686 1687 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1688 kfd_ioctl_update_queue, 0), 1689 1690 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1691 kfd_ioctl_create_event, 0), 1692 1693 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1694 kfd_ioctl_destroy_event, 0), 1695 1696 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1697 kfd_ioctl_set_event, 0), 1698 1699 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1700 kfd_ioctl_reset_event, 0), 1701 1702 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1703 kfd_ioctl_wait_events, 0), 1704 1705 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1706 kfd_ioctl_dbg_register, 0), 1707 1708 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1709 kfd_ioctl_dbg_unregister, 0), 1710 1711 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1712 kfd_ioctl_dbg_address_watch, 0), 1713 1714 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1715 kfd_ioctl_dbg_wave_control, 0), 1716 1717 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1718 kfd_ioctl_set_scratch_backing_va, 0), 1719 1720 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1721 kfd_ioctl_get_tile_config, 0), 1722 1723 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1724 kfd_ioctl_set_trap_handler, 0), 1725 1726 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1727 kfd_ioctl_get_process_apertures_new, 0), 1728 1729 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1730 kfd_ioctl_acquire_vm, 0), 1731 1732 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1733 kfd_ioctl_alloc_memory_of_gpu, 0), 1734 1735 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1736 kfd_ioctl_free_memory_of_gpu, 0), 1737 1738 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1739 kfd_ioctl_map_memory_to_gpu, 0), 1740 1741 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1742 kfd_ioctl_unmap_memory_from_gpu, 0), 1743 1744 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 1745 kfd_ioctl_set_cu_mask, 0), 1746 1747 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 1748 kfd_ioctl_get_queue_wave_state, 0), 1749 1750 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, 1751 kfd_ioctl_get_dmabuf_info, 0), 1752 1753 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 1754 kfd_ioctl_import_dmabuf, 0), 1755 1756 }; 1757 1758 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1759 1760 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1761 { 1762 struct kfd_process *process; 1763 amdkfd_ioctl_t *func; 1764 const struct amdkfd_ioctl_desc *ioctl = NULL; 1765 unsigned int nr = _IOC_NR(cmd); 1766 char stack_kdata[128]; 1767 char *kdata = NULL; 1768 unsigned int usize, asize; 1769 int retcode = -EINVAL; 1770 1771 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1772 goto err_i1; 1773 1774 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1775 u32 amdkfd_size; 1776 1777 ioctl = &amdkfd_ioctls[nr]; 1778 1779 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1780 usize = asize = _IOC_SIZE(cmd); 1781 if (amdkfd_size > asize) 1782 asize = amdkfd_size; 1783 1784 cmd = ioctl->cmd; 1785 } else 1786 goto err_i1; 1787 1788 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1789 1790 process = kfd_get_process(current); 1791 if (IS_ERR(process)) { 1792 dev_dbg(kfd_device, "no process\n"); 1793 goto err_i1; 1794 } 1795 1796 /* Do not trust userspace, use our own definition */ 1797 func = ioctl->func; 1798 1799 if (unlikely(!func)) { 1800 dev_dbg(kfd_device, "no function\n"); 1801 retcode = -EINVAL; 1802 goto err_i1; 1803 } 1804 1805 if (cmd & (IOC_IN | IOC_OUT)) { 1806 if (asize <= sizeof(stack_kdata)) { 1807 kdata = stack_kdata; 1808 } else { 1809 kdata = kmalloc(asize, GFP_KERNEL); 1810 if (!kdata) { 1811 retcode = -ENOMEM; 1812 goto err_i1; 1813 } 1814 } 1815 if (asize > usize) 1816 memset(kdata + usize, 0, asize - usize); 1817 } 1818 1819 if (cmd & IOC_IN) { 1820 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1821 retcode = -EFAULT; 1822 goto err_i1; 1823 } 1824 } else if (cmd & IOC_OUT) { 1825 memset(kdata, 0, usize); 1826 } 1827 1828 retcode = func(filep, process, kdata); 1829 1830 if (cmd & IOC_OUT) 1831 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1832 retcode = -EFAULT; 1833 1834 err_i1: 1835 if (!ioctl) 1836 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1837 task_pid_nr(current), cmd, nr); 1838 1839 if (kdata != stack_kdata) 1840 kfree(kdata); 1841 1842 if (retcode) 1843 dev_dbg(kfd_device, "ret = %d\n", retcode); 1844 1845 return retcode; 1846 } 1847 1848 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1849 { 1850 struct kfd_process *process; 1851 struct kfd_dev *dev = NULL; 1852 unsigned long vm_pgoff; 1853 unsigned int gpu_id; 1854 1855 process = kfd_get_process(current); 1856 if (IS_ERR(process)) 1857 return PTR_ERR(process); 1858 1859 vm_pgoff = vma->vm_pgoff; 1860 vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); 1861 gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); 1862 if (gpu_id) 1863 dev = kfd_device_by_id(gpu_id); 1864 1865 switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { 1866 case KFD_MMAP_TYPE_DOORBELL: 1867 if (!dev) 1868 return -ENODEV; 1869 return kfd_doorbell_mmap(dev, process, vma); 1870 1871 case KFD_MMAP_TYPE_EVENTS: 1872 return kfd_event_mmap(process, vma); 1873 1874 case KFD_MMAP_TYPE_RESERVED_MEM: 1875 if (!dev) 1876 return -ENODEV; 1877 return kfd_reserved_mem_mmap(dev, process, vma); 1878 } 1879 1880 return -EFAULT; 1881 } 1882