1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <asm/processor.h> 37 #include "kfd_priv.h" 38 #include "kfd_device_queue_manager.h" 39 #include "kfd_dbgmgr.h" 40 #include "amdgpu_amdkfd.h" 41 42 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 43 static int kfd_open(struct inode *, struct file *); 44 static int kfd_mmap(struct file *, struct vm_area_struct *); 45 46 static const char kfd_dev_name[] = "kfd"; 47 48 static const struct file_operations kfd_fops = { 49 .owner = THIS_MODULE, 50 .unlocked_ioctl = kfd_ioctl, 51 .compat_ioctl = kfd_ioctl, 52 .open = kfd_open, 53 .mmap = kfd_mmap, 54 }; 55 56 static int kfd_char_dev_major = -1; 57 static struct class *kfd_class; 58 struct device *kfd_device; 59 60 int kfd_chardev_init(void) 61 { 62 int err = 0; 63 64 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 65 err = kfd_char_dev_major; 66 if (err < 0) 67 goto err_register_chrdev; 68 69 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 70 err = PTR_ERR(kfd_class); 71 if (IS_ERR(kfd_class)) 72 goto err_class_create; 73 74 kfd_device = device_create(kfd_class, NULL, 75 MKDEV(kfd_char_dev_major, 0), 76 NULL, kfd_dev_name); 77 err = PTR_ERR(kfd_device); 78 if (IS_ERR(kfd_device)) 79 goto err_device_create; 80 81 return 0; 82 83 err_device_create: 84 class_destroy(kfd_class); 85 err_class_create: 86 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 87 err_register_chrdev: 88 return err; 89 } 90 91 void kfd_chardev_exit(void) 92 { 93 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 94 class_destroy(kfd_class); 95 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 96 } 97 98 struct device *kfd_chardev(void) 99 { 100 return kfd_device; 101 } 102 103 104 static int kfd_open(struct inode *inode, struct file *filep) 105 { 106 struct kfd_process *process; 107 bool is_32bit_user_mode; 108 109 if (iminor(inode) != 0) 110 return -ENODEV; 111 112 is_32bit_user_mode = in_compat_syscall(); 113 114 if (is_32bit_user_mode) { 115 dev_warn(kfd_device, 116 "Process %d (32-bit) failed to open /dev/kfd\n" 117 "32-bit processes are not supported by amdkfd\n", 118 current->pid); 119 return -EPERM; 120 } 121 122 process = kfd_create_process(filep); 123 if (IS_ERR(process)) 124 return PTR_ERR(process); 125 126 if (kfd_is_locked()) 127 return -EAGAIN; 128 129 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 130 process->pasid, process->is_32bit_user_mode); 131 132 return 0; 133 } 134 135 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 136 void *data) 137 { 138 struct kfd_ioctl_get_version_args *args = data; 139 140 args->major_version = KFD_IOCTL_MAJOR_VERSION; 141 args->minor_version = KFD_IOCTL_MINOR_VERSION; 142 143 return 0; 144 } 145 146 static int set_queue_properties_from_user(struct queue_properties *q_properties, 147 struct kfd_ioctl_create_queue_args *args) 148 { 149 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 150 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 151 return -EINVAL; 152 } 153 154 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 155 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 156 return -EINVAL; 157 } 158 159 if ((args->ring_base_address) && 160 (!access_ok(VERIFY_WRITE, 161 (const void __user *) args->ring_base_address, 162 sizeof(uint64_t)))) { 163 pr_err("Can't access ring base address\n"); 164 return -EFAULT; 165 } 166 167 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 168 pr_err("Ring size must be a power of 2 or 0\n"); 169 return -EINVAL; 170 } 171 172 if (!access_ok(VERIFY_WRITE, 173 (const void __user *) args->read_pointer_address, 174 sizeof(uint32_t))) { 175 pr_err("Can't access read pointer\n"); 176 return -EFAULT; 177 } 178 179 if (!access_ok(VERIFY_WRITE, 180 (const void __user *) args->write_pointer_address, 181 sizeof(uint32_t))) { 182 pr_err("Can't access write pointer\n"); 183 return -EFAULT; 184 } 185 186 if (args->eop_buffer_address && 187 !access_ok(VERIFY_WRITE, 188 (const void __user *) args->eop_buffer_address, 189 sizeof(uint32_t))) { 190 pr_debug("Can't access eop buffer"); 191 return -EFAULT; 192 } 193 194 if (args->ctx_save_restore_address && 195 !access_ok(VERIFY_WRITE, 196 (const void __user *) args->ctx_save_restore_address, 197 sizeof(uint32_t))) { 198 pr_debug("Can't access ctx save restore buffer"); 199 return -EFAULT; 200 } 201 202 q_properties->is_interop = false; 203 q_properties->queue_percent = args->queue_percentage; 204 q_properties->priority = args->queue_priority; 205 q_properties->queue_address = args->ring_base_address; 206 q_properties->queue_size = args->ring_size; 207 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 208 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 209 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 210 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 211 q_properties->ctx_save_restore_area_address = 212 args->ctx_save_restore_address; 213 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 214 q_properties->ctl_stack_size = args->ctl_stack_size; 215 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 216 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 217 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 218 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 219 q_properties->type = KFD_QUEUE_TYPE_SDMA; 220 else 221 return -ENOTSUPP; 222 223 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 224 q_properties->format = KFD_QUEUE_FORMAT_AQL; 225 else 226 q_properties->format = KFD_QUEUE_FORMAT_PM4; 227 228 pr_debug("Queue Percentage: %d, %d\n", 229 q_properties->queue_percent, args->queue_percentage); 230 231 pr_debug("Queue Priority: %d, %d\n", 232 q_properties->priority, args->queue_priority); 233 234 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 235 q_properties->queue_address, args->ring_base_address); 236 237 pr_debug("Queue Size: 0x%llX, %u\n", 238 q_properties->queue_size, args->ring_size); 239 240 pr_debug("Queue r/w Pointers: %px, %px\n", 241 q_properties->read_ptr, 242 q_properties->write_ptr); 243 244 pr_debug("Queue Format: %d\n", q_properties->format); 245 246 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 247 248 pr_debug("Queue CTX save area: 0x%llX\n", 249 q_properties->ctx_save_restore_area_address); 250 251 return 0; 252 } 253 254 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 255 void *data) 256 { 257 struct kfd_ioctl_create_queue_args *args = data; 258 struct kfd_dev *dev; 259 int err = 0; 260 unsigned int queue_id; 261 struct kfd_process_device *pdd; 262 struct queue_properties q_properties; 263 264 memset(&q_properties, 0, sizeof(struct queue_properties)); 265 266 pr_debug("Creating queue ioctl\n"); 267 268 err = set_queue_properties_from_user(&q_properties, args); 269 if (err) 270 return err; 271 272 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 273 dev = kfd_device_by_id(args->gpu_id); 274 if (!dev) { 275 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 276 return -EINVAL; 277 } 278 279 mutex_lock(&p->mutex); 280 281 pdd = kfd_bind_process_to_device(dev, p); 282 if (IS_ERR(pdd)) { 283 err = -ESRCH; 284 goto err_bind_process; 285 } 286 287 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 288 p->pasid, 289 dev->id); 290 291 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 292 if (err != 0) 293 goto err_create_queue; 294 295 args->queue_id = queue_id; 296 297 298 /* Return gpu_id as doorbell offset for mmap usage */ 299 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 300 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 301 args->doorbell_offset <<= PAGE_SHIFT; 302 if (KFD_IS_SOC15(dev->device_info->asic_family)) 303 /* On SOC15 ASICs, doorbell allocation must be 304 * per-device, and independent from the per-process 305 * queue_id. Return the doorbell offset within the 306 * doorbell aperture to user mode. 307 */ 308 args->doorbell_offset |= q_properties.doorbell_off; 309 310 mutex_unlock(&p->mutex); 311 312 pr_debug("Queue id %d was created successfully\n", args->queue_id); 313 314 pr_debug("Ring buffer address == 0x%016llX\n", 315 args->ring_base_address); 316 317 pr_debug("Read ptr address == 0x%016llX\n", 318 args->read_pointer_address); 319 320 pr_debug("Write ptr address == 0x%016llX\n", 321 args->write_pointer_address); 322 323 return 0; 324 325 err_create_queue: 326 err_bind_process: 327 mutex_unlock(&p->mutex); 328 return err; 329 } 330 331 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 332 void *data) 333 { 334 int retval; 335 struct kfd_ioctl_destroy_queue_args *args = data; 336 337 pr_debug("Destroying queue id %d for pasid %d\n", 338 args->queue_id, 339 p->pasid); 340 341 mutex_lock(&p->mutex); 342 343 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 344 345 mutex_unlock(&p->mutex); 346 return retval; 347 } 348 349 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 350 void *data) 351 { 352 int retval; 353 struct kfd_ioctl_update_queue_args *args = data; 354 struct queue_properties properties; 355 356 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 357 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 358 return -EINVAL; 359 } 360 361 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 362 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 363 return -EINVAL; 364 } 365 366 if ((args->ring_base_address) && 367 (!access_ok(VERIFY_WRITE, 368 (const void __user *) args->ring_base_address, 369 sizeof(uint64_t)))) { 370 pr_err("Can't access ring base address\n"); 371 return -EFAULT; 372 } 373 374 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 375 pr_err("Ring size must be a power of 2 or 0\n"); 376 return -EINVAL; 377 } 378 379 properties.queue_address = args->ring_base_address; 380 properties.queue_size = args->ring_size; 381 properties.queue_percent = args->queue_percentage; 382 properties.priority = args->queue_priority; 383 384 pr_debug("Updating queue id %d for pasid %d\n", 385 args->queue_id, p->pasid); 386 387 mutex_lock(&p->mutex); 388 389 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 390 391 mutex_unlock(&p->mutex); 392 393 return retval; 394 } 395 396 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 397 void *data) 398 { 399 int retval; 400 const int max_num_cus = 1024; 401 struct kfd_ioctl_set_cu_mask_args *args = data; 402 struct queue_properties properties; 403 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 404 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 405 406 if ((args->num_cu_mask % 32) != 0) { 407 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 408 args->num_cu_mask); 409 return -EINVAL; 410 } 411 412 properties.cu_mask_count = args->num_cu_mask; 413 if (properties.cu_mask_count == 0) { 414 pr_debug("CU mask cannot be 0"); 415 return -EINVAL; 416 } 417 418 /* To prevent an unreasonably large CU mask size, set an arbitrary 419 * limit of max_num_cus bits. We can then just drop any CU mask bits 420 * past max_num_cus bits and just use the first max_num_cus bits. 421 */ 422 if (properties.cu_mask_count > max_num_cus) { 423 pr_debug("CU mask cannot be greater than 1024 bits"); 424 properties.cu_mask_count = max_num_cus; 425 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 426 } 427 428 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); 429 if (!properties.cu_mask) 430 return -ENOMEM; 431 432 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); 433 if (retval) { 434 pr_debug("Could not copy CU mask from userspace"); 435 kfree(properties.cu_mask); 436 return -EFAULT; 437 } 438 439 mutex_lock(&p->mutex); 440 441 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); 442 443 mutex_unlock(&p->mutex); 444 445 if (retval) 446 kfree(properties.cu_mask); 447 448 return retval; 449 } 450 451 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 452 struct kfd_process *p, void *data) 453 { 454 struct kfd_ioctl_get_queue_wave_state_args *args = data; 455 int r; 456 457 mutex_lock(&p->mutex); 458 459 r = pqm_get_wave_state(&p->pqm, args->queue_id, 460 (void __user *)args->ctl_stack_address, 461 &args->ctl_stack_used_size, 462 &args->save_area_used_size); 463 464 mutex_unlock(&p->mutex); 465 466 return r; 467 } 468 469 static int kfd_ioctl_set_memory_policy(struct file *filep, 470 struct kfd_process *p, void *data) 471 { 472 struct kfd_ioctl_set_memory_policy_args *args = data; 473 struct kfd_dev *dev; 474 int err = 0; 475 struct kfd_process_device *pdd; 476 enum cache_policy default_policy, alternate_policy; 477 478 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 479 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 480 return -EINVAL; 481 } 482 483 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 484 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 485 return -EINVAL; 486 } 487 488 dev = kfd_device_by_id(args->gpu_id); 489 if (!dev) 490 return -EINVAL; 491 492 mutex_lock(&p->mutex); 493 494 pdd = kfd_bind_process_to_device(dev, p); 495 if (IS_ERR(pdd)) { 496 err = -ESRCH; 497 goto out; 498 } 499 500 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 501 ? cache_policy_coherent : cache_policy_noncoherent; 502 503 alternate_policy = 504 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 505 ? cache_policy_coherent : cache_policy_noncoherent; 506 507 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 508 &pdd->qpd, 509 default_policy, 510 alternate_policy, 511 (void __user *)args->alternate_aperture_base, 512 args->alternate_aperture_size)) 513 err = -EINVAL; 514 515 out: 516 mutex_unlock(&p->mutex); 517 518 return err; 519 } 520 521 static int kfd_ioctl_set_trap_handler(struct file *filep, 522 struct kfd_process *p, void *data) 523 { 524 struct kfd_ioctl_set_trap_handler_args *args = data; 525 struct kfd_dev *dev; 526 int err = 0; 527 struct kfd_process_device *pdd; 528 529 dev = kfd_device_by_id(args->gpu_id); 530 if (dev == NULL) 531 return -EINVAL; 532 533 mutex_lock(&p->mutex); 534 535 pdd = kfd_bind_process_to_device(dev, p); 536 if (IS_ERR(pdd)) { 537 err = -ESRCH; 538 goto out; 539 } 540 541 if (dev->dqm->ops.set_trap_handler(dev->dqm, 542 &pdd->qpd, 543 args->tba_addr, 544 args->tma_addr)) 545 err = -EINVAL; 546 547 out: 548 mutex_unlock(&p->mutex); 549 550 return err; 551 } 552 553 static int kfd_ioctl_dbg_register(struct file *filep, 554 struct kfd_process *p, void *data) 555 { 556 struct kfd_ioctl_dbg_register_args *args = data; 557 struct kfd_dev *dev; 558 struct kfd_dbgmgr *dbgmgr_ptr; 559 struct kfd_process_device *pdd; 560 bool create_ok; 561 long status = 0; 562 563 dev = kfd_device_by_id(args->gpu_id); 564 if (!dev) 565 return -EINVAL; 566 567 if (dev->device_info->asic_family == CHIP_CARRIZO) { 568 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 569 return -EINVAL; 570 } 571 572 mutex_lock(&p->mutex); 573 mutex_lock(kfd_get_dbgmgr_mutex()); 574 575 /* 576 * make sure that we have pdd, if this the first queue created for 577 * this process 578 */ 579 pdd = kfd_bind_process_to_device(dev, p); 580 if (IS_ERR(pdd)) { 581 status = PTR_ERR(pdd); 582 goto out; 583 } 584 585 if (!dev->dbgmgr) { 586 /* In case of a legal call, we have no dbgmgr yet */ 587 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 588 if (create_ok) { 589 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 590 if (status != 0) 591 kfd_dbgmgr_destroy(dbgmgr_ptr); 592 else 593 dev->dbgmgr = dbgmgr_ptr; 594 } 595 } else { 596 pr_debug("debugger already registered\n"); 597 status = -EINVAL; 598 } 599 600 out: 601 mutex_unlock(kfd_get_dbgmgr_mutex()); 602 mutex_unlock(&p->mutex); 603 604 return status; 605 } 606 607 static int kfd_ioctl_dbg_unregister(struct file *filep, 608 struct kfd_process *p, void *data) 609 { 610 struct kfd_ioctl_dbg_unregister_args *args = data; 611 struct kfd_dev *dev; 612 long status; 613 614 dev = kfd_device_by_id(args->gpu_id); 615 if (!dev || !dev->dbgmgr) 616 return -EINVAL; 617 618 if (dev->device_info->asic_family == CHIP_CARRIZO) { 619 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 620 return -EINVAL; 621 } 622 623 mutex_lock(kfd_get_dbgmgr_mutex()); 624 625 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 626 if (!status) { 627 kfd_dbgmgr_destroy(dev->dbgmgr); 628 dev->dbgmgr = NULL; 629 } 630 631 mutex_unlock(kfd_get_dbgmgr_mutex()); 632 633 return status; 634 } 635 636 /* 637 * Parse and generate variable size data structure for address watch. 638 * Total size of the buffer and # watch points is limited in order 639 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 640 * which is enforced by dbgdev module) 641 * please also note that the watch address itself are not "copied from user", 642 * since it be set into the HW in user mode values. 643 * 644 */ 645 static int kfd_ioctl_dbg_address_watch(struct file *filep, 646 struct kfd_process *p, void *data) 647 { 648 struct kfd_ioctl_dbg_address_watch_args *args = data; 649 struct kfd_dev *dev; 650 struct dbg_address_watch_info aw_info; 651 unsigned char *args_buff; 652 long status; 653 void __user *cmd_from_user; 654 uint64_t watch_mask_value = 0; 655 unsigned int args_idx = 0; 656 657 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 658 659 dev = kfd_device_by_id(args->gpu_id); 660 if (!dev) 661 return -EINVAL; 662 663 if (dev->device_info->asic_family == CHIP_CARRIZO) { 664 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 665 return -EINVAL; 666 } 667 668 cmd_from_user = (void __user *) args->content_ptr; 669 670 /* Validate arguments */ 671 672 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 673 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 674 (cmd_from_user == NULL)) 675 return -EINVAL; 676 677 /* this is the actual buffer to work with */ 678 args_buff = memdup_user(cmd_from_user, 679 args->buf_size_in_bytes - sizeof(*args)); 680 if (IS_ERR(args_buff)) 681 return PTR_ERR(args_buff); 682 683 aw_info.process = p; 684 685 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 686 args_idx += sizeof(aw_info.num_watch_points); 687 688 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 689 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 690 691 /* 692 * set watch address base pointer to point on the array base 693 * within args_buff 694 */ 695 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 696 697 /* skip over the addresses buffer */ 698 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 699 700 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 701 status = -EINVAL; 702 goto out; 703 } 704 705 watch_mask_value = (uint64_t) args_buff[args_idx]; 706 707 if (watch_mask_value > 0) { 708 /* 709 * There is an array of masks. 710 * set watch mask base pointer to point on the array base 711 * within args_buff 712 */ 713 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 714 715 /* skip over the masks buffer */ 716 args_idx += sizeof(aw_info.watch_mask) * 717 aw_info.num_watch_points; 718 } else { 719 /* just the NULL mask, set to NULL and skip over it */ 720 aw_info.watch_mask = NULL; 721 args_idx += sizeof(aw_info.watch_mask); 722 } 723 724 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 725 status = -EINVAL; 726 goto out; 727 } 728 729 /* Currently HSA Event is not supported for DBG */ 730 aw_info.watch_event = NULL; 731 732 mutex_lock(kfd_get_dbgmgr_mutex()); 733 734 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 735 736 mutex_unlock(kfd_get_dbgmgr_mutex()); 737 738 out: 739 kfree(args_buff); 740 741 return status; 742 } 743 744 /* Parse and generate fixed size data structure for wave control */ 745 static int kfd_ioctl_dbg_wave_control(struct file *filep, 746 struct kfd_process *p, void *data) 747 { 748 struct kfd_ioctl_dbg_wave_control_args *args = data; 749 struct kfd_dev *dev; 750 struct dbg_wave_control_info wac_info; 751 unsigned char *args_buff; 752 uint32_t computed_buff_size; 753 long status; 754 void __user *cmd_from_user; 755 unsigned int args_idx = 0; 756 757 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 758 759 /* we use compact form, independent of the packing attribute value */ 760 computed_buff_size = sizeof(*args) + 761 sizeof(wac_info.mode) + 762 sizeof(wac_info.operand) + 763 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 764 sizeof(wac_info.dbgWave_msg.MemoryVA) + 765 sizeof(wac_info.trapId); 766 767 dev = kfd_device_by_id(args->gpu_id); 768 if (!dev) 769 return -EINVAL; 770 771 if (dev->device_info->asic_family == CHIP_CARRIZO) { 772 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 773 return -EINVAL; 774 } 775 776 /* input size must match the computed "compact" size */ 777 if (args->buf_size_in_bytes != computed_buff_size) { 778 pr_debug("size mismatch, computed : actual %u : %u\n", 779 args->buf_size_in_bytes, computed_buff_size); 780 return -EINVAL; 781 } 782 783 cmd_from_user = (void __user *) args->content_ptr; 784 785 if (cmd_from_user == NULL) 786 return -EINVAL; 787 788 /* copy the entire buffer from user */ 789 790 args_buff = memdup_user(cmd_from_user, 791 args->buf_size_in_bytes - sizeof(*args)); 792 if (IS_ERR(args_buff)) 793 return PTR_ERR(args_buff); 794 795 /* move ptr to the start of the "pay-load" area */ 796 wac_info.process = p; 797 798 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 799 args_idx += sizeof(wac_info.operand); 800 801 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 802 args_idx += sizeof(wac_info.mode); 803 804 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 805 args_idx += sizeof(wac_info.trapId); 806 807 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 808 *((uint32_t *)(&args_buff[args_idx])); 809 wac_info.dbgWave_msg.MemoryVA = NULL; 810 811 mutex_lock(kfd_get_dbgmgr_mutex()); 812 813 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 814 wac_info.process, wac_info.operand, 815 wac_info.mode, wac_info.trapId, 816 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 817 818 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 819 820 pr_debug("Returned status of dbg manager is %ld\n", status); 821 822 mutex_unlock(kfd_get_dbgmgr_mutex()); 823 824 kfree(args_buff); 825 826 return status; 827 } 828 829 static int kfd_ioctl_get_clock_counters(struct file *filep, 830 struct kfd_process *p, void *data) 831 { 832 struct kfd_ioctl_get_clock_counters_args *args = data; 833 struct kfd_dev *dev; 834 835 dev = kfd_device_by_id(args->gpu_id); 836 if (dev) 837 /* Reading GPU clock counter from KGD */ 838 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); 839 else 840 /* Node without GPU resource */ 841 args->gpu_clock_counter = 0; 842 843 /* No access to rdtsc. Using raw monotonic time */ 844 args->cpu_clock_counter = ktime_get_raw_ns(); 845 args->system_clock_counter = ktime_get_boot_ns(); 846 847 /* Since the counter is in nano-seconds we use 1GHz frequency */ 848 args->system_clock_freq = 1000000000; 849 850 return 0; 851 } 852 853 854 static int kfd_ioctl_get_process_apertures(struct file *filp, 855 struct kfd_process *p, void *data) 856 { 857 struct kfd_ioctl_get_process_apertures_args *args = data; 858 struct kfd_process_device_apertures *pAperture; 859 struct kfd_process_device *pdd; 860 861 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 862 863 args->num_of_nodes = 0; 864 865 mutex_lock(&p->mutex); 866 867 /*if the process-device list isn't empty*/ 868 if (kfd_has_process_device_data(p)) { 869 /* Run over all pdd of the process */ 870 pdd = kfd_get_first_process_device_data(p); 871 do { 872 pAperture = 873 &args->process_apertures[args->num_of_nodes]; 874 pAperture->gpu_id = pdd->dev->id; 875 pAperture->lds_base = pdd->lds_base; 876 pAperture->lds_limit = pdd->lds_limit; 877 pAperture->gpuvm_base = pdd->gpuvm_base; 878 pAperture->gpuvm_limit = pdd->gpuvm_limit; 879 pAperture->scratch_base = pdd->scratch_base; 880 pAperture->scratch_limit = pdd->scratch_limit; 881 882 dev_dbg(kfd_device, 883 "node id %u\n", args->num_of_nodes); 884 dev_dbg(kfd_device, 885 "gpu id %u\n", pdd->dev->id); 886 dev_dbg(kfd_device, 887 "lds_base %llX\n", pdd->lds_base); 888 dev_dbg(kfd_device, 889 "lds_limit %llX\n", pdd->lds_limit); 890 dev_dbg(kfd_device, 891 "gpuvm_base %llX\n", pdd->gpuvm_base); 892 dev_dbg(kfd_device, 893 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 894 dev_dbg(kfd_device, 895 "scratch_base %llX\n", pdd->scratch_base); 896 dev_dbg(kfd_device, 897 "scratch_limit %llX\n", pdd->scratch_limit); 898 899 args->num_of_nodes++; 900 901 pdd = kfd_get_next_process_device_data(p, pdd); 902 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 903 } 904 905 mutex_unlock(&p->mutex); 906 907 return 0; 908 } 909 910 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 911 struct kfd_process *p, void *data) 912 { 913 struct kfd_ioctl_get_process_apertures_new_args *args = data; 914 struct kfd_process_device_apertures *pa; 915 struct kfd_process_device *pdd; 916 uint32_t nodes = 0; 917 int ret; 918 919 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 920 921 if (args->num_of_nodes == 0) { 922 /* Return number of nodes, so that user space can alloacate 923 * sufficient memory 924 */ 925 mutex_lock(&p->mutex); 926 927 if (!kfd_has_process_device_data(p)) 928 goto out_unlock; 929 930 /* Run over all pdd of the process */ 931 pdd = kfd_get_first_process_device_data(p); 932 do { 933 args->num_of_nodes++; 934 pdd = kfd_get_next_process_device_data(p, pdd); 935 } while (pdd); 936 937 goto out_unlock; 938 } 939 940 /* Fill in process-aperture information for all available 941 * nodes, but not more than args->num_of_nodes as that is 942 * the amount of memory allocated by user 943 */ 944 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 945 args->num_of_nodes), GFP_KERNEL); 946 if (!pa) 947 return -ENOMEM; 948 949 mutex_lock(&p->mutex); 950 951 if (!kfd_has_process_device_data(p)) { 952 args->num_of_nodes = 0; 953 kfree(pa); 954 goto out_unlock; 955 } 956 957 /* Run over all pdd of the process */ 958 pdd = kfd_get_first_process_device_data(p); 959 do { 960 pa[nodes].gpu_id = pdd->dev->id; 961 pa[nodes].lds_base = pdd->lds_base; 962 pa[nodes].lds_limit = pdd->lds_limit; 963 pa[nodes].gpuvm_base = pdd->gpuvm_base; 964 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 965 pa[nodes].scratch_base = pdd->scratch_base; 966 pa[nodes].scratch_limit = pdd->scratch_limit; 967 968 dev_dbg(kfd_device, 969 "gpu id %u\n", pdd->dev->id); 970 dev_dbg(kfd_device, 971 "lds_base %llX\n", pdd->lds_base); 972 dev_dbg(kfd_device, 973 "lds_limit %llX\n", pdd->lds_limit); 974 dev_dbg(kfd_device, 975 "gpuvm_base %llX\n", pdd->gpuvm_base); 976 dev_dbg(kfd_device, 977 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 978 dev_dbg(kfd_device, 979 "scratch_base %llX\n", pdd->scratch_base); 980 dev_dbg(kfd_device, 981 "scratch_limit %llX\n", pdd->scratch_limit); 982 nodes++; 983 984 pdd = kfd_get_next_process_device_data(p, pdd); 985 } while (pdd && (nodes < args->num_of_nodes)); 986 mutex_unlock(&p->mutex); 987 988 args->num_of_nodes = nodes; 989 ret = copy_to_user( 990 (void __user *)args->kfd_process_device_apertures_ptr, 991 pa, 992 (nodes * sizeof(struct kfd_process_device_apertures))); 993 kfree(pa); 994 return ret ? -EFAULT : 0; 995 996 out_unlock: 997 mutex_unlock(&p->mutex); 998 return 0; 999 } 1000 1001 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 1002 void *data) 1003 { 1004 struct kfd_ioctl_create_event_args *args = data; 1005 int err; 1006 1007 /* For dGPUs the event page is allocated in user mode. The 1008 * handle is passed to KFD with the first call to this IOCTL 1009 * through the event_page_offset field. 1010 */ 1011 if (args->event_page_offset) { 1012 struct kfd_dev *kfd; 1013 struct kfd_process_device *pdd; 1014 void *mem, *kern_addr; 1015 uint64_t size; 1016 1017 if (p->signal_page) { 1018 pr_err("Event page is already set\n"); 1019 return -EINVAL; 1020 } 1021 1022 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 1023 if (!kfd) { 1024 pr_err("Getting device by id failed in %s\n", __func__); 1025 return -EINVAL; 1026 } 1027 1028 mutex_lock(&p->mutex); 1029 pdd = kfd_bind_process_to_device(kfd, p); 1030 if (IS_ERR(pdd)) { 1031 err = PTR_ERR(pdd); 1032 goto out_unlock; 1033 } 1034 1035 mem = kfd_process_device_translate_handle(pdd, 1036 GET_IDR_HANDLE(args->event_page_offset)); 1037 if (!mem) { 1038 pr_err("Can't find BO, offset is 0x%llx\n", 1039 args->event_page_offset); 1040 err = -EINVAL; 1041 goto out_unlock; 1042 } 1043 mutex_unlock(&p->mutex); 1044 1045 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, 1046 mem, &kern_addr, &size); 1047 if (err) { 1048 pr_err("Failed to map event page to kernel\n"); 1049 return err; 1050 } 1051 1052 err = kfd_event_page_set(p, kern_addr, size); 1053 if (err) { 1054 pr_err("Failed to set event page\n"); 1055 return err; 1056 } 1057 } 1058 1059 err = kfd_event_create(filp, p, args->event_type, 1060 args->auto_reset != 0, args->node_id, 1061 &args->event_id, &args->event_trigger_data, 1062 &args->event_page_offset, 1063 &args->event_slot_index); 1064 1065 return err; 1066 1067 out_unlock: 1068 mutex_unlock(&p->mutex); 1069 return err; 1070 } 1071 1072 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1073 void *data) 1074 { 1075 struct kfd_ioctl_destroy_event_args *args = data; 1076 1077 return kfd_event_destroy(p, args->event_id); 1078 } 1079 1080 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1081 void *data) 1082 { 1083 struct kfd_ioctl_set_event_args *args = data; 1084 1085 return kfd_set_event(p, args->event_id); 1086 } 1087 1088 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1089 void *data) 1090 { 1091 struct kfd_ioctl_reset_event_args *args = data; 1092 1093 return kfd_reset_event(p, args->event_id); 1094 } 1095 1096 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1097 void *data) 1098 { 1099 struct kfd_ioctl_wait_events_args *args = data; 1100 int err; 1101 1102 err = kfd_wait_on_events(p, args->num_events, 1103 (void __user *)args->events_ptr, 1104 (args->wait_for_all != 0), 1105 args->timeout, &args->wait_result); 1106 1107 return err; 1108 } 1109 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1110 struct kfd_process *p, void *data) 1111 { 1112 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1113 struct kfd_process_device *pdd; 1114 struct kfd_dev *dev; 1115 long err; 1116 1117 dev = kfd_device_by_id(args->gpu_id); 1118 if (!dev) 1119 return -EINVAL; 1120 1121 mutex_lock(&p->mutex); 1122 1123 pdd = kfd_bind_process_to_device(dev, p); 1124 if (IS_ERR(pdd)) { 1125 err = PTR_ERR(pdd); 1126 goto bind_process_to_device_fail; 1127 } 1128 1129 pdd->qpd.sh_hidden_private_base = args->va_addr; 1130 1131 mutex_unlock(&p->mutex); 1132 1133 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1134 pdd->qpd.vmid != 0) 1135 dev->kfd2kgd->set_scratch_backing_va( 1136 dev->kgd, args->va_addr, pdd->qpd.vmid); 1137 1138 return 0; 1139 1140 bind_process_to_device_fail: 1141 mutex_unlock(&p->mutex); 1142 return err; 1143 } 1144 1145 static int kfd_ioctl_get_tile_config(struct file *filep, 1146 struct kfd_process *p, void *data) 1147 { 1148 struct kfd_ioctl_get_tile_config_args *args = data; 1149 struct kfd_dev *dev; 1150 struct tile_config config; 1151 int err = 0; 1152 1153 dev = kfd_device_by_id(args->gpu_id); 1154 if (!dev) 1155 return -EINVAL; 1156 1157 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1158 1159 args->gb_addr_config = config.gb_addr_config; 1160 args->num_banks = config.num_banks; 1161 args->num_ranks = config.num_ranks; 1162 1163 if (args->num_tile_configs > config.num_tile_configs) 1164 args->num_tile_configs = config.num_tile_configs; 1165 err = copy_to_user((void __user *)args->tile_config_ptr, 1166 config.tile_config_ptr, 1167 args->num_tile_configs * sizeof(uint32_t)); 1168 if (err) { 1169 args->num_tile_configs = 0; 1170 return -EFAULT; 1171 } 1172 1173 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1174 args->num_macro_tile_configs = 1175 config.num_macro_tile_configs; 1176 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1177 config.macro_tile_config_ptr, 1178 args->num_macro_tile_configs * sizeof(uint32_t)); 1179 if (err) { 1180 args->num_macro_tile_configs = 0; 1181 return -EFAULT; 1182 } 1183 1184 return 0; 1185 } 1186 1187 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1188 void *data) 1189 { 1190 struct kfd_ioctl_acquire_vm_args *args = data; 1191 struct kfd_process_device *pdd; 1192 struct kfd_dev *dev; 1193 struct file *drm_file; 1194 int ret; 1195 1196 dev = kfd_device_by_id(args->gpu_id); 1197 if (!dev) 1198 return -EINVAL; 1199 1200 drm_file = fget(args->drm_fd); 1201 if (!drm_file) 1202 return -EINVAL; 1203 1204 mutex_lock(&p->mutex); 1205 1206 pdd = kfd_get_process_device_data(dev, p); 1207 if (!pdd) { 1208 ret = -EINVAL; 1209 goto err_unlock; 1210 } 1211 1212 if (pdd->drm_file) { 1213 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1214 goto err_unlock; 1215 } 1216 1217 ret = kfd_process_device_init_vm(pdd, drm_file); 1218 if (ret) 1219 goto err_unlock; 1220 /* On success, the PDD keeps the drm_file reference */ 1221 mutex_unlock(&p->mutex); 1222 1223 return 0; 1224 1225 err_unlock: 1226 mutex_unlock(&p->mutex); 1227 fput(drm_file); 1228 return ret; 1229 } 1230 1231 bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1232 { 1233 struct kfd_local_mem_info mem_info; 1234 1235 if (debug_largebar) { 1236 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1237 return true; 1238 } 1239 1240 if (dev->device_info->needs_iommu_device) 1241 return false; 1242 1243 amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); 1244 if (mem_info.local_mem_size_private == 0 && 1245 mem_info.local_mem_size_public > 0) 1246 return true; 1247 return false; 1248 } 1249 1250 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1251 struct kfd_process *p, void *data) 1252 { 1253 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1254 struct kfd_process_device *pdd; 1255 void *mem; 1256 struct kfd_dev *dev; 1257 int idr_handle; 1258 long err; 1259 uint64_t offset = args->mmap_offset; 1260 uint32_t flags = args->flags; 1261 1262 if (args->size == 0) 1263 return -EINVAL; 1264 1265 dev = kfd_device_by_id(args->gpu_id); 1266 if (!dev) 1267 return -EINVAL; 1268 1269 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1270 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1271 !kfd_dev_is_large_bar(dev)) { 1272 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1273 return -EINVAL; 1274 } 1275 1276 mutex_lock(&p->mutex); 1277 1278 pdd = kfd_bind_process_to_device(dev, p); 1279 if (IS_ERR(pdd)) { 1280 err = PTR_ERR(pdd); 1281 goto err_unlock; 1282 } 1283 1284 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1285 dev->kgd, args->va_addr, args->size, 1286 pdd->vm, (struct kgd_mem **) &mem, &offset, 1287 flags); 1288 1289 if (err) 1290 goto err_unlock; 1291 1292 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1293 if (idr_handle < 0) { 1294 err = -EFAULT; 1295 goto err_free; 1296 } 1297 1298 mutex_unlock(&p->mutex); 1299 1300 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1301 args->mmap_offset = offset; 1302 1303 return 0; 1304 1305 err_free: 1306 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1307 err_unlock: 1308 mutex_unlock(&p->mutex); 1309 return err; 1310 } 1311 1312 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1313 struct kfd_process *p, void *data) 1314 { 1315 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1316 struct kfd_process_device *pdd; 1317 void *mem; 1318 struct kfd_dev *dev; 1319 int ret; 1320 1321 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1322 if (!dev) 1323 return -EINVAL; 1324 1325 mutex_lock(&p->mutex); 1326 1327 pdd = kfd_get_process_device_data(dev, p); 1328 if (!pdd) { 1329 pr_err("Process device data doesn't exist\n"); 1330 ret = -EINVAL; 1331 goto err_unlock; 1332 } 1333 1334 mem = kfd_process_device_translate_handle( 1335 pdd, GET_IDR_HANDLE(args->handle)); 1336 if (!mem) { 1337 ret = -EINVAL; 1338 goto err_unlock; 1339 } 1340 1341 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, 1342 (struct kgd_mem *)mem); 1343 1344 /* If freeing the buffer failed, leave the handle in place for 1345 * clean-up during process tear-down. 1346 */ 1347 if (!ret) 1348 kfd_process_device_remove_obj_handle( 1349 pdd, GET_IDR_HANDLE(args->handle)); 1350 1351 err_unlock: 1352 mutex_unlock(&p->mutex); 1353 return ret; 1354 } 1355 1356 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1357 struct kfd_process *p, void *data) 1358 { 1359 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1360 struct kfd_process_device *pdd, *peer_pdd; 1361 void *mem; 1362 struct kfd_dev *dev, *peer; 1363 long err = 0; 1364 int i; 1365 uint32_t *devices_arr = NULL; 1366 1367 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1368 if (!dev) 1369 return -EINVAL; 1370 1371 if (!args->n_devices) { 1372 pr_debug("Device IDs array empty\n"); 1373 return -EINVAL; 1374 } 1375 if (args->n_success > args->n_devices) { 1376 pr_debug("n_success exceeds n_devices\n"); 1377 return -EINVAL; 1378 } 1379 1380 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1381 GFP_KERNEL); 1382 if (!devices_arr) 1383 return -ENOMEM; 1384 1385 err = copy_from_user(devices_arr, 1386 (void __user *)args->device_ids_array_ptr, 1387 args->n_devices * sizeof(*devices_arr)); 1388 if (err != 0) { 1389 err = -EFAULT; 1390 goto copy_from_user_failed; 1391 } 1392 1393 mutex_lock(&p->mutex); 1394 1395 pdd = kfd_bind_process_to_device(dev, p); 1396 if (IS_ERR(pdd)) { 1397 err = PTR_ERR(pdd); 1398 goto bind_process_to_device_failed; 1399 } 1400 1401 mem = kfd_process_device_translate_handle(pdd, 1402 GET_IDR_HANDLE(args->handle)); 1403 if (!mem) { 1404 err = -ENOMEM; 1405 goto get_mem_obj_from_handle_failed; 1406 } 1407 1408 for (i = args->n_success; i < args->n_devices; i++) { 1409 peer = kfd_device_by_id(devices_arr[i]); 1410 if (!peer) { 1411 pr_debug("Getting device by id failed for 0x%x\n", 1412 devices_arr[i]); 1413 err = -EINVAL; 1414 goto get_mem_obj_from_handle_failed; 1415 } 1416 1417 peer_pdd = kfd_bind_process_to_device(peer, p); 1418 if (IS_ERR(peer_pdd)) { 1419 err = PTR_ERR(peer_pdd); 1420 goto get_mem_obj_from_handle_failed; 1421 } 1422 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1423 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1424 if (err) { 1425 pr_err("Failed to map to gpu %d/%d\n", 1426 i, args->n_devices); 1427 goto map_memory_to_gpu_failed; 1428 } 1429 args->n_success = i+1; 1430 } 1431 1432 mutex_unlock(&p->mutex); 1433 1434 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1435 if (err) { 1436 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1437 goto sync_memory_failed; 1438 } 1439 1440 /* Flush TLBs after waiting for the page table updates to complete */ 1441 for (i = 0; i < args->n_devices; i++) { 1442 peer = kfd_device_by_id(devices_arr[i]); 1443 if (WARN_ON_ONCE(!peer)) 1444 continue; 1445 peer_pdd = kfd_get_process_device_data(peer, p); 1446 if (WARN_ON_ONCE(!peer_pdd)) 1447 continue; 1448 kfd_flush_tlb(peer_pdd); 1449 } 1450 1451 kfree(devices_arr); 1452 1453 return err; 1454 1455 bind_process_to_device_failed: 1456 get_mem_obj_from_handle_failed: 1457 map_memory_to_gpu_failed: 1458 mutex_unlock(&p->mutex); 1459 copy_from_user_failed: 1460 sync_memory_failed: 1461 kfree(devices_arr); 1462 1463 return err; 1464 } 1465 1466 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1467 struct kfd_process *p, void *data) 1468 { 1469 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1470 struct kfd_process_device *pdd, *peer_pdd; 1471 void *mem; 1472 struct kfd_dev *dev, *peer; 1473 long err = 0; 1474 uint32_t *devices_arr = NULL, i; 1475 1476 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1477 if (!dev) 1478 return -EINVAL; 1479 1480 if (!args->n_devices) { 1481 pr_debug("Device IDs array empty\n"); 1482 return -EINVAL; 1483 } 1484 if (args->n_success > args->n_devices) { 1485 pr_debug("n_success exceeds n_devices\n"); 1486 return -EINVAL; 1487 } 1488 1489 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1490 GFP_KERNEL); 1491 if (!devices_arr) 1492 return -ENOMEM; 1493 1494 err = copy_from_user(devices_arr, 1495 (void __user *)args->device_ids_array_ptr, 1496 args->n_devices * sizeof(*devices_arr)); 1497 if (err != 0) { 1498 err = -EFAULT; 1499 goto copy_from_user_failed; 1500 } 1501 1502 mutex_lock(&p->mutex); 1503 1504 pdd = kfd_get_process_device_data(dev, p); 1505 if (!pdd) { 1506 err = -EINVAL; 1507 goto bind_process_to_device_failed; 1508 } 1509 1510 mem = kfd_process_device_translate_handle(pdd, 1511 GET_IDR_HANDLE(args->handle)); 1512 if (!mem) { 1513 err = -ENOMEM; 1514 goto get_mem_obj_from_handle_failed; 1515 } 1516 1517 for (i = args->n_success; i < args->n_devices; i++) { 1518 peer = kfd_device_by_id(devices_arr[i]); 1519 if (!peer) { 1520 err = -EINVAL; 1521 goto get_mem_obj_from_handle_failed; 1522 } 1523 1524 peer_pdd = kfd_get_process_device_data(peer, p); 1525 if (!peer_pdd) { 1526 err = -ENODEV; 1527 goto get_mem_obj_from_handle_failed; 1528 } 1529 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1530 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1531 if (err) { 1532 pr_err("Failed to unmap from gpu %d/%d\n", 1533 i, args->n_devices); 1534 goto unmap_memory_from_gpu_failed; 1535 } 1536 args->n_success = i+1; 1537 } 1538 kfree(devices_arr); 1539 1540 mutex_unlock(&p->mutex); 1541 1542 return 0; 1543 1544 bind_process_to_device_failed: 1545 get_mem_obj_from_handle_failed: 1546 unmap_memory_from_gpu_failed: 1547 mutex_unlock(&p->mutex); 1548 copy_from_user_failed: 1549 kfree(devices_arr); 1550 return err; 1551 } 1552 1553 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1554 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1555 .cmd_drv = 0, .name = #ioctl} 1556 1557 /** Ioctl table */ 1558 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1559 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1560 kfd_ioctl_get_version, 0), 1561 1562 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1563 kfd_ioctl_create_queue, 0), 1564 1565 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1566 kfd_ioctl_destroy_queue, 0), 1567 1568 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1569 kfd_ioctl_set_memory_policy, 0), 1570 1571 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1572 kfd_ioctl_get_clock_counters, 0), 1573 1574 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1575 kfd_ioctl_get_process_apertures, 0), 1576 1577 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1578 kfd_ioctl_update_queue, 0), 1579 1580 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1581 kfd_ioctl_create_event, 0), 1582 1583 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1584 kfd_ioctl_destroy_event, 0), 1585 1586 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1587 kfd_ioctl_set_event, 0), 1588 1589 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1590 kfd_ioctl_reset_event, 0), 1591 1592 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1593 kfd_ioctl_wait_events, 0), 1594 1595 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1596 kfd_ioctl_dbg_register, 0), 1597 1598 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1599 kfd_ioctl_dbg_unregister, 0), 1600 1601 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1602 kfd_ioctl_dbg_address_watch, 0), 1603 1604 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1605 kfd_ioctl_dbg_wave_control, 0), 1606 1607 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1608 kfd_ioctl_set_scratch_backing_va, 0), 1609 1610 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1611 kfd_ioctl_get_tile_config, 0), 1612 1613 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1614 kfd_ioctl_set_trap_handler, 0), 1615 1616 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1617 kfd_ioctl_get_process_apertures_new, 0), 1618 1619 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1620 kfd_ioctl_acquire_vm, 0), 1621 1622 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1623 kfd_ioctl_alloc_memory_of_gpu, 0), 1624 1625 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1626 kfd_ioctl_free_memory_of_gpu, 0), 1627 1628 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1629 kfd_ioctl_map_memory_to_gpu, 0), 1630 1631 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1632 kfd_ioctl_unmap_memory_from_gpu, 0), 1633 1634 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 1635 kfd_ioctl_set_cu_mask, 0), 1636 1637 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 1638 kfd_ioctl_get_queue_wave_state, 0) 1639 1640 }; 1641 1642 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1643 1644 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1645 { 1646 struct kfd_process *process; 1647 amdkfd_ioctl_t *func; 1648 const struct amdkfd_ioctl_desc *ioctl = NULL; 1649 unsigned int nr = _IOC_NR(cmd); 1650 char stack_kdata[128]; 1651 char *kdata = NULL; 1652 unsigned int usize, asize; 1653 int retcode = -EINVAL; 1654 1655 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1656 goto err_i1; 1657 1658 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1659 u32 amdkfd_size; 1660 1661 ioctl = &amdkfd_ioctls[nr]; 1662 1663 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1664 usize = asize = _IOC_SIZE(cmd); 1665 if (amdkfd_size > asize) 1666 asize = amdkfd_size; 1667 1668 cmd = ioctl->cmd; 1669 } else 1670 goto err_i1; 1671 1672 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1673 1674 process = kfd_get_process(current); 1675 if (IS_ERR(process)) { 1676 dev_dbg(kfd_device, "no process\n"); 1677 goto err_i1; 1678 } 1679 1680 /* Do not trust userspace, use our own definition */ 1681 func = ioctl->func; 1682 1683 if (unlikely(!func)) { 1684 dev_dbg(kfd_device, "no function\n"); 1685 retcode = -EINVAL; 1686 goto err_i1; 1687 } 1688 1689 if (cmd & (IOC_IN | IOC_OUT)) { 1690 if (asize <= sizeof(stack_kdata)) { 1691 kdata = stack_kdata; 1692 } else { 1693 kdata = kmalloc(asize, GFP_KERNEL); 1694 if (!kdata) { 1695 retcode = -ENOMEM; 1696 goto err_i1; 1697 } 1698 } 1699 if (asize > usize) 1700 memset(kdata + usize, 0, asize - usize); 1701 } 1702 1703 if (cmd & IOC_IN) { 1704 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1705 retcode = -EFAULT; 1706 goto err_i1; 1707 } 1708 } else if (cmd & IOC_OUT) { 1709 memset(kdata, 0, usize); 1710 } 1711 1712 retcode = func(filep, process, kdata); 1713 1714 if (cmd & IOC_OUT) 1715 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1716 retcode = -EFAULT; 1717 1718 err_i1: 1719 if (!ioctl) 1720 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1721 task_pid_nr(current), cmd, nr); 1722 1723 if (kdata != stack_kdata) 1724 kfree(kdata); 1725 1726 if (retcode) 1727 dev_dbg(kfd_device, "ret = %d\n", retcode); 1728 1729 return retcode; 1730 } 1731 1732 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1733 { 1734 struct kfd_process *process; 1735 struct kfd_dev *dev = NULL; 1736 unsigned long vm_pgoff; 1737 unsigned int gpu_id; 1738 1739 process = kfd_get_process(current); 1740 if (IS_ERR(process)) 1741 return PTR_ERR(process); 1742 1743 vm_pgoff = vma->vm_pgoff; 1744 vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); 1745 gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); 1746 if (gpu_id) 1747 dev = kfd_device_by_id(gpu_id); 1748 1749 switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { 1750 case KFD_MMAP_TYPE_DOORBELL: 1751 if (!dev) 1752 return -ENODEV; 1753 return kfd_doorbell_mmap(dev, process, vma); 1754 1755 case KFD_MMAP_TYPE_EVENTS: 1756 return kfd_event_mmap(process, vma); 1757 1758 case KFD_MMAP_TYPE_RESERVED_MEM: 1759 if (!dev) 1760 return -ENODEV; 1761 return kfd_reserved_mem_mmap(dev, process, vma); 1762 } 1763 1764 return -EFAULT; 1765 } 1766