1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <asm/processor.h> 37 #include "kfd_priv.h" 38 #include "kfd_device_queue_manager.h" 39 #include "kfd_dbgmgr.h" 40 41 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 42 static int kfd_open(struct inode *, struct file *); 43 static int kfd_mmap(struct file *, struct vm_area_struct *); 44 45 static const char kfd_dev_name[] = "kfd"; 46 47 static const struct file_operations kfd_fops = { 48 .owner = THIS_MODULE, 49 .unlocked_ioctl = kfd_ioctl, 50 .compat_ioctl = kfd_ioctl, 51 .open = kfd_open, 52 .mmap = kfd_mmap, 53 }; 54 55 static int kfd_char_dev_major = -1; 56 static struct class *kfd_class; 57 struct device *kfd_device; 58 59 int kfd_chardev_init(void) 60 { 61 int err = 0; 62 63 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 64 err = kfd_char_dev_major; 65 if (err < 0) 66 goto err_register_chrdev; 67 68 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 69 err = PTR_ERR(kfd_class); 70 if (IS_ERR(kfd_class)) 71 goto err_class_create; 72 73 kfd_device = device_create(kfd_class, NULL, 74 MKDEV(kfd_char_dev_major, 0), 75 NULL, kfd_dev_name); 76 err = PTR_ERR(kfd_device); 77 if (IS_ERR(kfd_device)) 78 goto err_device_create; 79 80 return 0; 81 82 err_device_create: 83 class_destroy(kfd_class); 84 err_class_create: 85 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 86 err_register_chrdev: 87 return err; 88 } 89 90 void kfd_chardev_exit(void) 91 { 92 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 93 class_destroy(kfd_class); 94 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 95 } 96 97 struct device *kfd_chardev(void) 98 { 99 return kfd_device; 100 } 101 102 103 static int kfd_open(struct inode *inode, struct file *filep) 104 { 105 struct kfd_process *process; 106 bool is_32bit_user_mode; 107 108 if (iminor(inode) != 0) 109 return -ENODEV; 110 111 is_32bit_user_mode = in_compat_syscall(); 112 113 if (is_32bit_user_mode) { 114 dev_warn(kfd_device, 115 "Process %d (32-bit) failed to open /dev/kfd\n" 116 "32-bit processes are not supported by amdkfd\n", 117 current->pid); 118 return -EPERM; 119 } 120 121 process = kfd_create_process(filep); 122 if (IS_ERR(process)) 123 return PTR_ERR(process); 124 125 if (kfd_is_locked()) 126 return -EAGAIN; 127 128 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 129 process->pasid, process->is_32bit_user_mode); 130 131 return 0; 132 } 133 134 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 135 void *data) 136 { 137 struct kfd_ioctl_get_version_args *args = data; 138 139 args->major_version = KFD_IOCTL_MAJOR_VERSION; 140 args->minor_version = KFD_IOCTL_MINOR_VERSION; 141 142 return 0; 143 } 144 145 static int set_queue_properties_from_user(struct queue_properties *q_properties, 146 struct kfd_ioctl_create_queue_args *args) 147 { 148 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 149 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 150 return -EINVAL; 151 } 152 153 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 154 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 155 return -EINVAL; 156 } 157 158 if ((args->ring_base_address) && 159 (!access_ok(VERIFY_WRITE, 160 (const void __user *) args->ring_base_address, 161 sizeof(uint64_t)))) { 162 pr_err("Can't access ring base address\n"); 163 return -EFAULT; 164 } 165 166 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 167 pr_err("Ring size must be a power of 2 or 0\n"); 168 return -EINVAL; 169 } 170 171 if (!access_ok(VERIFY_WRITE, 172 (const void __user *) args->read_pointer_address, 173 sizeof(uint32_t))) { 174 pr_err("Can't access read pointer\n"); 175 return -EFAULT; 176 } 177 178 if (!access_ok(VERIFY_WRITE, 179 (const void __user *) args->write_pointer_address, 180 sizeof(uint32_t))) { 181 pr_err("Can't access write pointer\n"); 182 return -EFAULT; 183 } 184 185 if (args->eop_buffer_address && 186 !access_ok(VERIFY_WRITE, 187 (const void __user *) args->eop_buffer_address, 188 sizeof(uint32_t))) { 189 pr_debug("Can't access eop buffer"); 190 return -EFAULT; 191 } 192 193 if (args->ctx_save_restore_address && 194 !access_ok(VERIFY_WRITE, 195 (const void __user *) args->ctx_save_restore_address, 196 sizeof(uint32_t))) { 197 pr_debug("Can't access ctx save restore buffer"); 198 return -EFAULT; 199 } 200 201 q_properties->is_interop = false; 202 q_properties->queue_percent = args->queue_percentage; 203 q_properties->priority = args->queue_priority; 204 q_properties->queue_address = args->ring_base_address; 205 q_properties->queue_size = args->ring_size; 206 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 207 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 208 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 209 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 210 q_properties->ctx_save_restore_area_address = 211 args->ctx_save_restore_address; 212 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 213 q_properties->ctl_stack_size = args->ctl_stack_size; 214 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 215 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 216 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 217 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 218 q_properties->type = KFD_QUEUE_TYPE_SDMA; 219 else 220 return -ENOTSUPP; 221 222 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 223 q_properties->format = KFD_QUEUE_FORMAT_AQL; 224 else 225 q_properties->format = KFD_QUEUE_FORMAT_PM4; 226 227 pr_debug("Queue Percentage: %d, %d\n", 228 q_properties->queue_percent, args->queue_percentage); 229 230 pr_debug("Queue Priority: %d, %d\n", 231 q_properties->priority, args->queue_priority); 232 233 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 234 q_properties->queue_address, args->ring_base_address); 235 236 pr_debug("Queue Size: 0x%llX, %u\n", 237 q_properties->queue_size, args->ring_size); 238 239 pr_debug("Queue r/w Pointers: %px, %px\n", 240 q_properties->read_ptr, 241 q_properties->write_ptr); 242 243 pr_debug("Queue Format: %d\n", q_properties->format); 244 245 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 246 247 pr_debug("Queue CTX save area: 0x%llX\n", 248 q_properties->ctx_save_restore_area_address); 249 250 return 0; 251 } 252 253 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 254 void *data) 255 { 256 struct kfd_ioctl_create_queue_args *args = data; 257 struct kfd_dev *dev; 258 int err = 0; 259 unsigned int queue_id; 260 struct kfd_process_device *pdd; 261 struct queue_properties q_properties; 262 263 memset(&q_properties, 0, sizeof(struct queue_properties)); 264 265 pr_debug("Creating queue ioctl\n"); 266 267 err = set_queue_properties_from_user(&q_properties, args); 268 if (err) 269 return err; 270 271 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 272 dev = kfd_device_by_id(args->gpu_id); 273 if (!dev) { 274 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 275 return -EINVAL; 276 } 277 278 mutex_lock(&p->mutex); 279 280 pdd = kfd_bind_process_to_device(dev, p); 281 if (IS_ERR(pdd)) { 282 err = -ESRCH; 283 goto err_bind_process; 284 } 285 286 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 287 p->pasid, 288 dev->id); 289 290 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 291 if (err != 0) 292 goto err_create_queue; 293 294 args->queue_id = queue_id; 295 296 297 /* Return gpu_id as doorbell offset for mmap usage */ 298 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 299 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 300 args->doorbell_offset <<= PAGE_SHIFT; 301 if (KFD_IS_SOC15(dev->device_info->asic_family)) 302 /* On SOC15 ASICs, doorbell allocation must be 303 * per-device, and independent from the per-process 304 * queue_id. Return the doorbell offset within the 305 * doorbell aperture to user mode. 306 */ 307 args->doorbell_offset |= q_properties.doorbell_off; 308 309 mutex_unlock(&p->mutex); 310 311 pr_debug("Queue id %d was created successfully\n", args->queue_id); 312 313 pr_debug("Ring buffer address == 0x%016llX\n", 314 args->ring_base_address); 315 316 pr_debug("Read ptr address == 0x%016llX\n", 317 args->read_pointer_address); 318 319 pr_debug("Write ptr address == 0x%016llX\n", 320 args->write_pointer_address); 321 322 return 0; 323 324 err_create_queue: 325 err_bind_process: 326 mutex_unlock(&p->mutex); 327 return err; 328 } 329 330 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 331 void *data) 332 { 333 int retval; 334 struct kfd_ioctl_destroy_queue_args *args = data; 335 336 pr_debug("Destroying queue id %d for pasid %d\n", 337 args->queue_id, 338 p->pasid); 339 340 mutex_lock(&p->mutex); 341 342 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 343 344 mutex_unlock(&p->mutex); 345 return retval; 346 } 347 348 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 349 void *data) 350 { 351 int retval; 352 struct kfd_ioctl_update_queue_args *args = data; 353 struct queue_properties properties; 354 355 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 356 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 357 return -EINVAL; 358 } 359 360 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 361 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 362 return -EINVAL; 363 } 364 365 if ((args->ring_base_address) && 366 (!access_ok(VERIFY_WRITE, 367 (const void __user *) args->ring_base_address, 368 sizeof(uint64_t)))) { 369 pr_err("Can't access ring base address\n"); 370 return -EFAULT; 371 } 372 373 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 374 pr_err("Ring size must be a power of 2 or 0\n"); 375 return -EINVAL; 376 } 377 378 properties.queue_address = args->ring_base_address; 379 properties.queue_size = args->ring_size; 380 properties.queue_percent = args->queue_percentage; 381 properties.priority = args->queue_priority; 382 383 pr_debug("Updating queue id %d for pasid %d\n", 384 args->queue_id, p->pasid); 385 386 mutex_lock(&p->mutex); 387 388 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 389 390 mutex_unlock(&p->mutex); 391 392 return retval; 393 } 394 395 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 396 void *data) 397 { 398 int retval; 399 const int max_num_cus = 1024; 400 struct kfd_ioctl_set_cu_mask_args *args = data; 401 struct queue_properties properties; 402 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 403 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 404 405 if ((args->num_cu_mask % 32) != 0) { 406 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 407 args->num_cu_mask); 408 return -EINVAL; 409 } 410 411 properties.cu_mask_count = args->num_cu_mask; 412 if (properties.cu_mask_count == 0) { 413 pr_debug("CU mask cannot be 0"); 414 return -EINVAL; 415 } 416 417 /* To prevent an unreasonably large CU mask size, set an arbitrary 418 * limit of max_num_cus bits. We can then just drop any CU mask bits 419 * past max_num_cus bits and just use the first max_num_cus bits. 420 */ 421 if (properties.cu_mask_count > max_num_cus) { 422 pr_debug("CU mask cannot be greater than 1024 bits"); 423 properties.cu_mask_count = max_num_cus; 424 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 425 } 426 427 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); 428 if (!properties.cu_mask) 429 return -ENOMEM; 430 431 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); 432 if (retval) { 433 pr_debug("Could not copy CU mask from userspace"); 434 kfree(properties.cu_mask); 435 return -EFAULT; 436 } 437 438 mutex_lock(&p->mutex); 439 440 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); 441 442 mutex_unlock(&p->mutex); 443 444 if (retval) 445 kfree(properties.cu_mask); 446 447 return retval; 448 } 449 450 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 451 struct kfd_process *p, void *data) 452 { 453 struct kfd_ioctl_get_queue_wave_state_args *args = data; 454 int r; 455 456 mutex_lock(&p->mutex); 457 458 r = pqm_get_wave_state(&p->pqm, args->queue_id, 459 (void __user *)args->ctl_stack_address, 460 &args->ctl_stack_used_size, 461 &args->save_area_used_size); 462 463 mutex_unlock(&p->mutex); 464 465 return r; 466 } 467 468 static int kfd_ioctl_set_memory_policy(struct file *filep, 469 struct kfd_process *p, void *data) 470 { 471 struct kfd_ioctl_set_memory_policy_args *args = data; 472 struct kfd_dev *dev; 473 int err = 0; 474 struct kfd_process_device *pdd; 475 enum cache_policy default_policy, alternate_policy; 476 477 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 478 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 479 return -EINVAL; 480 } 481 482 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 483 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 484 return -EINVAL; 485 } 486 487 dev = kfd_device_by_id(args->gpu_id); 488 if (!dev) 489 return -EINVAL; 490 491 mutex_lock(&p->mutex); 492 493 pdd = kfd_bind_process_to_device(dev, p); 494 if (IS_ERR(pdd)) { 495 err = -ESRCH; 496 goto out; 497 } 498 499 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 500 ? cache_policy_coherent : cache_policy_noncoherent; 501 502 alternate_policy = 503 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 504 ? cache_policy_coherent : cache_policy_noncoherent; 505 506 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 507 &pdd->qpd, 508 default_policy, 509 alternate_policy, 510 (void __user *)args->alternate_aperture_base, 511 args->alternate_aperture_size)) 512 err = -EINVAL; 513 514 out: 515 mutex_unlock(&p->mutex); 516 517 return err; 518 } 519 520 static int kfd_ioctl_set_trap_handler(struct file *filep, 521 struct kfd_process *p, void *data) 522 { 523 struct kfd_ioctl_set_trap_handler_args *args = data; 524 struct kfd_dev *dev; 525 int err = 0; 526 struct kfd_process_device *pdd; 527 528 dev = kfd_device_by_id(args->gpu_id); 529 if (dev == NULL) 530 return -EINVAL; 531 532 mutex_lock(&p->mutex); 533 534 pdd = kfd_bind_process_to_device(dev, p); 535 if (IS_ERR(pdd)) { 536 err = -ESRCH; 537 goto out; 538 } 539 540 if (dev->dqm->ops.set_trap_handler(dev->dqm, 541 &pdd->qpd, 542 args->tba_addr, 543 args->tma_addr)) 544 err = -EINVAL; 545 546 out: 547 mutex_unlock(&p->mutex); 548 549 return err; 550 } 551 552 static int kfd_ioctl_dbg_register(struct file *filep, 553 struct kfd_process *p, void *data) 554 { 555 struct kfd_ioctl_dbg_register_args *args = data; 556 struct kfd_dev *dev; 557 struct kfd_dbgmgr *dbgmgr_ptr; 558 struct kfd_process_device *pdd; 559 bool create_ok; 560 long status = 0; 561 562 dev = kfd_device_by_id(args->gpu_id); 563 if (!dev) 564 return -EINVAL; 565 566 if (dev->device_info->asic_family == CHIP_CARRIZO) { 567 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 568 return -EINVAL; 569 } 570 571 mutex_lock(&p->mutex); 572 mutex_lock(kfd_get_dbgmgr_mutex()); 573 574 /* 575 * make sure that we have pdd, if this the first queue created for 576 * this process 577 */ 578 pdd = kfd_bind_process_to_device(dev, p); 579 if (IS_ERR(pdd)) { 580 status = PTR_ERR(pdd); 581 goto out; 582 } 583 584 if (!dev->dbgmgr) { 585 /* In case of a legal call, we have no dbgmgr yet */ 586 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 587 if (create_ok) { 588 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 589 if (status != 0) 590 kfd_dbgmgr_destroy(dbgmgr_ptr); 591 else 592 dev->dbgmgr = dbgmgr_ptr; 593 } 594 } else { 595 pr_debug("debugger already registered\n"); 596 status = -EINVAL; 597 } 598 599 out: 600 mutex_unlock(kfd_get_dbgmgr_mutex()); 601 mutex_unlock(&p->mutex); 602 603 return status; 604 } 605 606 static int kfd_ioctl_dbg_unregister(struct file *filep, 607 struct kfd_process *p, void *data) 608 { 609 struct kfd_ioctl_dbg_unregister_args *args = data; 610 struct kfd_dev *dev; 611 long status; 612 613 dev = kfd_device_by_id(args->gpu_id); 614 if (!dev || !dev->dbgmgr) 615 return -EINVAL; 616 617 if (dev->device_info->asic_family == CHIP_CARRIZO) { 618 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 619 return -EINVAL; 620 } 621 622 mutex_lock(kfd_get_dbgmgr_mutex()); 623 624 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 625 if (!status) { 626 kfd_dbgmgr_destroy(dev->dbgmgr); 627 dev->dbgmgr = NULL; 628 } 629 630 mutex_unlock(kfd_get_dbgmgr_mutex()); 631 632 return status; 633 } 634 635 /* 636 * Parse and generate variable size data structure for address watch. 637 * Total size of the buffer and # watch points is limited in order 638 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 639 * which is enforced by dbgdev module) 640 * please also note that the watch address itself are not "copied from user", 641 * since it be set into the HW in user mode values. 642 * 643 */ 644 static int kfd_ioctl_dbg_address_watch(struct file *filep, 645 struct kfd_process *p, void *data) 646 { 647 struct kfd_ioctl_dbg_address_watch_args *args = data; 648 struct kfd_dev *dev; 649 struct dbg_address_watch_info aw_info; 650 unsigned char *args_buff; 651 long status; 652 void __user *cmd_from_user; 653 uint64_t watch_mask_value = 0; 654 unsigned int args_idx = 0; 655 656 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 657 658 dev = kfd_device_by_id(args->gpu_id); 659 if (!dev) 660 return -EINVAL; 661 662 if (dev->device_info->asic_family == CHIP_CARRIZO) { 663 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 664 return -EINVAL; 665 } 666 667 cmd_from_user = (void __user *) args->content_ptr; 668 669 /* Validate arguments */ 670 671 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 672 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 673 (cmd_from_user == NULL)) 674 return -EINVAL; 675 676 /* this is the actual buffer to work with */ 677 args_buff = memdup_user(cmd_from_user, 678 args->buf_size_in_bytes - sizeof(*args)); 679 if (IS_ERR(args_buff)) 680 return PTR_ERR(args_buff); 681 682 aw_info.process = p; 683 684 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 685 args_idx += sizeof(aw_info.num_watch_points); 686 687 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 688 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 689 690 /* 691 * set watch address base pointer to point on the array base 692 * within args_buff 693 */ 694 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 695 696 /* skip over the addresses buffer */ 697 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 698 699 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 700 status = -EINVAL; 701 goto out; 702 } 703 704 watch_mask_value = (uint64_t) args_buff[args_idx]; 705 706 if (watch_mask_value > 0) { 707 /* 708 * There is an array of masks. 709 * set watch mask base pointer to point on the array base 710 * within args_buff 711 */ 712 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 713 714 /* skip over the masks buffer */ 715 args_idx += sizeof(aw_info.watch_mask) * 716 aw_info.num_watch_points; 717 } else { 718 /* just the NULL mask, set to NULL and skip over it */ 719 aw_info.watch_mask = NULL; 720 args_idx += sizeof(aw_info.watch_mask); 721 } 722 723 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 724 status = -EINVAL; 725 goto out; 726 } 727 728 /* Currently HSA Event is not supported for DBG */ 729 aw_info.watch_event = NULL; 730 731 mutex_lock(kfd_get_dbgmgr_mutex()); 732 733 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 734 735 mutex_unlock(kfd_get_dbgmgr_mutex()); 736 737 out: 738 kfree(args_buff); 739 740 return status; 741 } 742 743 /* Parse and generate fixed size data structure for wave control */ 744 static int kfd_ioctl_dbg_wave_control(struct file *filep, 745 struct kfd_process *p, void *data) 746 { 747 struct kfd_ioctl_dbg_wave_control_args *args = data; 748 struct kfd_dev *dev; 749 struct dbg_wave_control_info wac_info; 750 unsigned char *args_buff; 751 uint32_t computed_buff_size; 752 long status; 753 void __user *cmd_from_user; 754 unsigned int args_idx = 0; 755 756 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 757 758 /* we use compact form, independent of the packing attribute value */ 759 computed_buff_size = sizeof(*args) + 760 sizeof(wac_info.mode) + 761 sizeof(wac_info.operand) + 762 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 763 sizeof(wac_info.dbgWave_msg.MemoryVA) + 764 sizeof(wac_info.trapId); 765 766 dev = kfd_device_by_id(args->gpu_id); 767 if (!dev) 768 return -EINVAL; 769 770 if (dev->device_info->asic_family == CHIP_CARRIZO) { 771 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 772 return -EINVAL; 773 } 774 775 /* input size must match the computed "compact" size */ 776 if (args->buf_size_in_bytes != computed_buff_size) { 777 pr_debug("size mismatch, computed : actual %u : %u\n", 778 args->buf_size_in_bytes, computed_buff_size); 779 return -EINVAL; 780 } 781 782 cmd_from_user = (void __user *) args->content_ptr; 783 784 if (cmd_from_user == NULL) 785 return -EINVAL; 786 787 /* copy the entire buffer from user */ 788 789 args_buff = memdup_user(cmd_from_user, 790 args->buf_size_in_bytes - sizeof(*args)); 791 if (IS_ERR(args_buff)) 792 return PTR_ERR(args_buff); 793 794 /* move ptr to the start of the "pay-load" area */ 795 wac_info.process = p; 796 797 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 798 args_idx += sizeof(wac_info.operand); 799 800 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 801 args_idx += sizeof(wac_info.mode); 802 803 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 804 args_idx += sizeof(wac_info.trapId); 805 806 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 807 *((uint32_t *)(&args_buff[args_idx])); 808 wac_info.dbgWave_msg.MemoryVA = NULL; 809 810 mutex_lock(kfd_get_dbgmgr_mutex()); 811 812 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 813 wac_info.process, wac_info.operand, 814 wac_info.mode, wac_info.trapId, 815 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 816 817 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 818 819 pr_debug("Returned status of dbg manager is %ld\n", status); 820 821 mutex_unlock(kfd_get_dbgmgr_mutex()); 822 823 kfree(args_buff); 824 825 return status; 826 } 827 828 static int kfd_ioctl_get_clock_counters(struct file *filep, 829 struct kfd_process *p, void *data) 830 { 831 struct kfd_ioctl_get_clock_counters_args *args = data; 832 struct kfd_dev *dev; 833 834 dev = kfd_device_by_id(args->gpu_id); 835 if (dev) 836 /* Reading GPU clock counter from KGD */ 837 args->gpu_clock_counter = 838 dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); 839 else 840 /* Node without GPU resource */ 841 args->gpu_clock_counter = 0; 842 843 /* No access to rdtsc. Using raw monotonic time */ 844 args->cpu_clock_counter = ktime_get_raw_ns(); 845 args->system_clock_counter = ktime_get_boot_ns(); 846 847 /* Since the counter is in nano-seconds we use 1GHz frequency */ 848 args->system_clock_freq = 1000000000; 849 850 return 0; 851 } 852 853 854 static int kfd_ioctl_get_process_apertures(struct file *filp, 855 struct kfd_process *p, void *data) 856 { 857 struct kfd_ioctl_get_process_apertures_args *args = data; 858 struct kfd_process_device_apertures *pAperture; 859 struct kfd_process_device *pdd; 860 861 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 862 863 args->num_of_nodes = 0; 864 865 mutex_lock(&p->mutex); 866 867 /*if the process-device list isn't empty*/ 868 if (kfd_has_process_device_data(p)) { 869 /* Run over all pdd of the process */ 870 pdd = kfd_get_first_process_device_data(p); 871 do { 872 pAperture = 873 &args->process_apertures[args->num_of_nodes]; 874 pAperture->gpu_id = pdd->dev->id; 875 pAperture->lds_base = pdd->lds_base; 876 pAperture->lds_limit = pdd->lds_limit; 877 pAperture->gpuvm_base = pdd->gpuvm_base; 878 pAperture->gpuvm_limit = pdd->gpuvm_limit; 879 pAperture->scratch_base = pdd->scratch_base; 880 pAperture->scratch_limit = pdd->scratch_limit; 881 882 dev_dbg(kfd_device, 883 "node id %u\n", args->num_of_nodes); 884 dev_dbg(kfd_device, 885 "gpu id %u\n", pdd->dev->id); 886 dev_dbg(kfd_device, 887 "lds_base %llX\n", pdd->lds_base); 888 dev_dbg(kfd_device, 889 "lds_limit %llX\n", pdd->lds_limit); 890 dev_dbg(kfd_device, 891 "gpuvm_base %llX\n", pdd->gpuvm_base); 892 dev_dbg(kfd_device, 893 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 894 dev_dbg(kfd_device, 895 "scratch_base %llX\n", pdd->scratch_base); 896 dev_dbg(kfd_device, 897 "scratch_limit %llX\n", pdd->scratch_limit); 898 899 args->num_of_nodes++; 900 901 pdd = kfd_get_next_process_device_data(p, pdd); 902 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 903 } 904 905 mutex_unlock(&p->mutex); 906 907 return 0; 908 } 909 910 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 911 struct kfd_process *p, void *data) 912 { 913 struct kfd_ioctl_get_process_apertures_new_args *args = data; 914 struct kfd_process_device_apertures *pa; 915 struct kfd_process_device *pdd; 916 uint32_t nodes = 0; 917 int ret; 918 919 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 920 921 if (args->num_of_nodes == 0) { 922 /* Return number of nodes, so that user space can alloacate 923 * sufficient memory 924 */ 925 mutex_lock(&p->mutex); 926 927 if (!kfd_has_process_device_data(p)) 928 goto out_unlock; 929 930 /* Run over all pdd of the process */ 931 pdd = kfd_get_first_process_device_data(p); 932 do { 933 args->num_of_nodes++; 934 pdd = kfd_get_next_process_device_data(p, pdd); 935 } while (pdd); 936 937 goto out_unlock; 938 } 939 940 /* Fill in process-aperture information for all available 941 * nodes, but not more than args->num_of_nodes as that is 942 * the amount of memory allocated by user 943 */ 944 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 945 args->num_of_nodes), GFP_KERNEL); 946 if (!pa) 947 return -ENOMEM; 948 949 mutex_lock(&p->mutex); 950 951 if (!kfd_has_process_device_data(p)) { 952 args->num_of_nodes = 0; 953 kfree(pa); 954 goto out_unlock; 955 } 956 957 /* Run over all pdd of the process */ 958 pdd = kfd_get_first_process_device_data(p); 959 do { 960 pa[nodes].gpu_id = pdd->dev->id; 961 pa[nodes].lds_base = pdd->lds_base; 962 pa[nodes].lds_limit = pdd->lds_limit; 963 pa[nodes].gpuvm_base = pdd->gpuvm_base; 964 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 965 pa[nodes].scratch_base = pdd->scratch_base; 966 pa[nodes].scratch_limit = pdd->scratch_limit; 967 968 dev_dbg(kfd_device, 969 "gpu id %u\n", pdd->dev->id); 970 dev_dbg(kfd_device, 971 "lds_base %llX\n", pdd->lds_base); 972 dev_dbg(kfd_device, 973 "lds_limit %llX\n", pdd->lds_limit); 974 dev_dbg(kfd_device, 975 "gpuvm_base %llX\n", pdd->gpuvm_base); 976 dev_dbg(kfd_device, 977 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 978 dev_dbg(kfd_device, 979 "scratch_base %llX\n", pdd->scratch_base); 980 dev_dbg(kfd_device, 981 "scratch_limit %llX\n", pdd->scratch_limit); 982 nodes++; 983 984 pdd = kfd_get_next_process_device_data(p, pdd); 985 } while (pdd && (nodes < args->num_of_nodes)); 986 mutex_unlock(&p->mutex); 987 988 args->num_of_nodes = nodes; 989 ret = copy_to_user( 990 (void __user *)args->kfd_process_device_apertures_ptr, 991 pa, 992 (nodes * sizeof(struct kfd_process_device_apertures))); 993 kfree(pa); 994 return ret ? -EFAULT : 0; 995 996 out_unlock: 997 mutex_unlock(&p->mutex); 998 return 0; 999 } 1000 1001 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 1002 void *data) 1003 { 1004 struct kfd_ioctl_create_event_args *args = data; 1005 int err; 1006 1007 /* For dGPUs the event page is allocated in user mode. The 1008 * handle is passed to KFD with the first call to this IOCTL 1009 * through the event_page_offset field. 1010 */ 1011 if (args->event_page_offset) { 1012 struct kfd_dev *kfd; 1013 struct kfd_process_device *pdd; 1014 void *mem, *kern_addr; 1015 uint64_t size; 1016 1017 if (p->signal_page) { 1018 pr_err("Event page is already set\n"); 1019 return -EINVAL; 1020 } 1021 1022 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 1023 if (!kfd) { 1024 pr_err("Getting device by id failed in %s\n", __func__); 1025 return -EINVAL; 1026 } 1027 1028 mutex_lock(&p->mutex); 1029 pdd = kfd_bind_process_to_device(kfd, p); 1030 if (IS_ERR(pdd)) { 1031 err = PTR_ERR(pdd); 1032 goto out_unlock; 1033 } 1034 1035 mem = kfd_process_device_translate_handle(pdd, 1036 GET_IDR_HANDLE(args->event_page_offset)); 1037 if (!mem) { 1038 pr_err("Can't find BO, offset is 0x%llx\n", 1039 args->event_page_offset); 1040 err = -EINVAL; 1041 goto out_unlock; 1042 } 1043 mutex_unlock(&p->mutex); 1044 1045 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, 1046 mem, &kern_addr, &size); 1047 if (err) { 1048 pr_err("Failed to map event page to kernel\n"); 1049 return err; 1050 } 1051 1052 err = kfd_event_page_set(p, kern_addr, size); 1053 if (err) { 1054 pr_err("Failed to set event page\n"); 1055 return err; 1056 } 1057 } 1058 1059 err = kfd_event_create(filp, p, args->event_type, 1060 args->auto_reset != 0, args->node_id, 1061 &args->event_id, &args->event_trigger_data, 1062 &args->event_page_offset, 1063 &args->event_slot_index); 1064 1065 return err; 1066 1067 out_unlock: 1068 mutex_unlock(&p->mutex); 1069 return err; 1070 } 1071 1072 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1073 void *data) 1074 { 1075 struct kfd_ioctl_destroy_event_args *args = data; 1076 1077 return kfd_event_destroy(p, args->event_id); 1078 } 1079 1080 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1081 void *data) 1082 { 1083 struct kfd_ioctl_set_event_args *args = data; 1084 1085 return kfd_set_event(p, args->event_id); 1086 } 1087 1088 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1089 void *data) 1090 { 1091 struct kfd_ioctl_reset_event_args *args = data; 1092 1093 return kfd_reset_event(p, args->event_id); 1094 } 1095 1096 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1097 void *data) 1098 { 1099 struct kfd_ioctl_wait_events_args *args = data; 1100 int err; 1101 1102 err = kfd_wait_on_events(p, args->num_events, 1103 (void __user *)args->events_ptr, 1104 (args->wait_for_all != 0), 1105 args->timeout, &args->wait_result); 1106 1107 return err; 1108 } 1109 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1110 struct kfd_process *p, void *data) 1111 { 1112 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1113 struct kfd_process_device *pdd; 1114 struct kfd_dev *dev; 1115 long err; 1116 1117 dev = kfd_device_by_id(args->gpu_id); 1118 if (!dev) 1119 return -EINVAL; 1120 1121 mutex_lock(&p->mutex); 1122 1123 pdd = kfd_bind_process_to_device(dev, p); 1124 if (IS_ERR(pdd)) { 1125 err = PTR_ERR(pdd); 1126 goto bind_process_to_device_fail; 1127 } 1128 1129 pdd->qpd.sh_hidden_private_base = args->va_addr; 1130 1131 mutex_unlock(&p->mutex); 1132 1133 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1134 pdd->qpd.vmid != 0) 1135 dev->kfd2kgd->set_scratch_backing_va( 1136 dev->kgd, args->va_addr, pdd->qpd.vmid); 1137 1138 return 0; 1139 1140 bind_process_to_device_fail: 1141 mutex_unlock(&p->mutex); 1142 return err; 1143 } 1144 1145 static int kfd_ioctl_get_tile_config(struct file *filep, 1146 struct kfd_process *p, void *data) 1147 { 1148 struct kfd_ioctl_get_tile_config_args *args = data; 1149 struct kfd_dev *dev; 1150 struct tile_config config; 1151 int err = 0; 1152 1153 dev = kfd_device_by_id(args->gpu_id); 1154 if (!dev) 1155 return -EINVAL; 1156 1157 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1158 1159 args->gb_addr_config = config.gb_addr_config; 1160 args->num_banks = config.num_banks; 1161 args->num_ranks = config.num_ranks; 1162 1163 if (args->num_tile_configs > config.num_tile_configs) 1164 args->num_tile_configs = config.num_tile_configs; 1165 err = copy_to_user((void __user *)args->tile_config_ptr, 1166 config.tile_config_ptr, 1167 args->num_tile_configs * sizeof(uint32_t)); 1168 if (err) { 1169 args->num_tile_configs = 0; 1170 return -EFAULT; 1171 } 1172 1173 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1174 args->num_macro_tile_configs = 1175 config.num_macro_tile_configs; 1176 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1177 config.macro_tile_config_ptr, 1178 args->num_macro_tile_configs * sizeof(uint32_t)); 1179 if (err) { 1180 args->num_macro_tile_configs = 0; 1181 return -EFAULT; 1182 } 1183 1184 return 0; 1185 } 1186 1187 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1188 void *data) 1189 { 1190 struct kfd_ioctl_acquire_vm_args *args = data; 1191 struct kfd_process_device *pdd; 1192 struct kfd_dev *dev; 1193 struct file *drm_file; 1194 int ret; 1195 1196 dev = kfd_device_by_id(args->gpu_id); 1197 if (!dev) 1198 return -EINVAL; 1199 1200 drm_file = fget(args->drm_fd); 1201 if (!drm_file) 1202 return -EINVAL; 1203 1204 mutex_lock(&p->mutex); 1205 1206 pdd = kfd_get_process_device_data(dev, p); 1207 if (!pdd) { 1208 ret = -EINVAL; 1209 goto err_unlock; 1210 } 1211 1212 if (pdd->drm_file) { 1213 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1214 goto err_unlock; 1215 } 1216 1217 ret = kfd_process_device_init_vm(pdd, drm_file); 1218 if (ret) 1219 goto err_unlock; 1220 /* On success, the PDD keeps the drm_file reference */ 1221 mutex_unlock(&p->mutex); 1222 1223 return 0; 1224 1225 err_unlock: 1226 mutex_unlock(&p->mutex); 1227 fput(drm_file); 1228 return ret; 1229 } 1230 1231 bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1232 { 1233 struct kfd_local_mem_info mem_info; 1234 1235 if (debug_largebar) { 1236 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1237 return true; 1238 } 1239 1240 if (dev->device_info->needs_iommu_device) 1241 return false; 1242 1243 dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); 1244 if (mem_info.local_mem_size_private == 0 && 1245 mem_info.local_mem_size_public > 0) 1246 return true; 1247 return false; 1248 } 1249 1250 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1251 struct kfd_process *p, void *data) 1252 { 1253 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1254 struct kfd_process_device *pdd; 1255 void *mem; 1256 struct kfd_dev *dev; 1257 int idr_handle; 1258 long err; 1259 uint64_t offset = args->mmap_offset; 1260 uint32_t flags = args->flags; 1261 1262 if (args->size == 0) 1263 return -EINVAL; 1264 1265 dev = kfd_device_by_id(args->gpu_id); 1266 if (!dev) 1267 return -EINVAL; 1268 1269 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1270 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1271 !kfd_dev_is_large_bar(dev)) { 1272 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1273 return -EINVAL; 1274 } 1275 1276 mutex_lock(&p->mutex); 1277 1278 pdd = kfd_bind_process_to_device(dev, p); 1279 if (IS_ERR(pdd)) { 1280 err = PTR_ERR(pdd); 1281 goto err_unlock; 1282 } 1283 1284 err = dev->kfd2kgd->alloc_memory_of_gpu( 1285 dev->kgd, args->va_addr, args->size, 1286 pdd->vm, (struct kgd_mem **) &mem, &offset, 1287 flags); 1288 1289 if (err) 1290 goto err_unlock; 1291 1292 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1293 if (idr_handle < 0) { 1294 err = -EFAULT; 1295 goto err_free; 1296 } 1297 1298 mutex_unlock(&p->mutex); 1299 1300 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1301 args->mmap_offset = offset; 1302 1303 return 0; 1304 1305 err_free: 1306 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1307 err_unlock: 1308 mutex_unlock(&p->mutex); 1309 return err; 1310 } 1311 1312 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1313 struct kfd_process *p, void *data) 1314 { 1315 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1316 struct kfd_process_device *pdd; 1317 void *mem; 1318 struct kfd_dev *dev; 1319 int ret; 1320 1321 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1322 if (!dev) 1323 return -EINVAL; 1324 1325 mutex_lock(&p->mutex); 1326 1327 pdd = kfd_get_process_device_data(dev, p); 1328 if (!pdd) { 1329 pr_err("Process device data doesn't exist\n"); 1330 ret = -EINVAL; 1331 goto err_unlock; 1332 } 1333 1334 mem = kfd_process_device_translate_handle( 1335 pdd, GET_IDR_HANDLE(args->handle)); 1336 if (!mem) { 1337 ret = -EINVAL; 1338 goto err_unlock; 1339 } 1340 1341 ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1342 1343 /* If freeing the buffer failed, leave the handle in place for 1344 * clean-up during process tear-down. 1345 */ 1346 if (!ret) 1347 kfd_process_device_remove_obj_handle( 1348 pdd, GET_IDR_HANDLE(args->handle)); 1349 1350 err_unlock: 1351 mutex_unlock(&p->mutex); 1352 return ret; 1353 } 1354 1355 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1356 struct kfd_process *p, void *data) 1357 { 1358 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1359 struct kfd_process_device *pdd, *peer_pdd; 1360 void *mem; 1361 struct kfd_dev *dev, *peer; 1362 long err = 0; 1363 int i; 1364 uint32_t *devices_arr = NULL; 1365 1366 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1367 if (!dev) 1368 return -EINVAL; 1369 1370 if (!args->n_devices) { 1371 pr_debug("Device IDs array empty\n"); 1372 return -EINVAL; 1373 } 1374 if (args->n_success > args->n_devices) { 1375 pr_debug("n_success exceeds n_devices\n"); 1376 return -EINVAL; 1377 } 1378 1379 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1380 GFP_KERNEL); 1381 if (!devices_arr) 1382 return -ENOMEM; 1383 1384 err = copy_from_user(devices_arr, 1385 (void __user *)args->device_ids_array_ptr, 1386 args->n_devices * sizeof(*devices_arr)); 1387 if (err != 0) { 1388 err = -EFAULT; 1389 goto copy_from_user_failed; 1390 } 1391 1392 mutex_lock(&p->mutex); 1393 1394 pdd = kfd_bind_process_to_device(dev, p); 1395 if (IS_ERR(pdd)) { 1396 err = PTR_ERR(pdd); 1397 goto bind_process_to_device_failed; 1398 } 1399 1400 mem = kfd_process_device_translate_handle(pdd, 1401 GET_IDR_HANDLE(args->handle)); 1402 if (!mem) { 1403 err = -ENOMEM; 1404 goto get_mem_obj_from_handle_failed; 1405 } 1406 1407 for (i = args->n_success; i < args->n_devices; i++) { 1408 peer = kfd_device_by_id(devices_arr[i]); 1409 if (!peer) { 1410 pr_debug("Getting device by id failed for 0x%x\n", 1411 devices_arr[i]); 1412 err = -EINVAL; 1413 goto get_mem_obj_from_handle_failed; 1414 } 1415 1416 peer_pdd = kfd_bind_process_to_device(peer, p); 1417 if (IS_ERR(peer_pdd)) { 1418 err = PTR_ERR(peer_pdd); 1419 goto get_mem_obj_from_handle_failed; 1420 } 1421 err = peer->kfd2kgd->map_memory_to_gpu( 1422 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1423 if (err) { 1424 pr_err("Failed to map to gpu %d/%d\n", 1425 i, args->n_devices); 1426 goto map_memory_to_gpu_failed; 1427 } 1428 args->n_success = i+1; 1429 } 1430 1431 mutex_unlock(&p->mutex); 1432 1433 err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1434 if (err) { 1435 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1436 goto sync_memory_failed; 1437 } 1438 1439 /* Flush TLBs after waiting for the page table updates to complete */ 1440 for (i = 0; i < args->n_devices; i++) { 1441 peer = kfd_device_by_id(devices_arr[i]); 1442 if (WARN_ON_ONCE(!peer)) 1443 continue; 1444 peer_pdd = kfd_get_process_device_data(peer, p); 1445 if (WARN_ON_ONCE(!peer_pdd)) 1446 continue; 1447 kfd_flush_tlb(peer_pdd); 1448 } 1449 1450 kfree(devices_arr); 1451 1452 return err; 1453 1454 bind_process_to_device_failed: 1455 get_mem_obj_from_handle_failed: 1456 map_memory_to_gpu_failed: 1457 mutex_unlock(&p->mutex); 1458 copy_from_user_failed: 1459 sync_memory_failed: 1460 kfree(devices_arr); 1461 1462 return err; 1463 } 1464 1465 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1466 struct kfd_process *p, void *data) 1467 { 1468 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1469 struct kfd_process_device *pdd, *peer_pdd; 1470 void *mem; 1471 struct kfd_dev *dev, *peer; 1472 long err = 0; 1473 uint32_t *devices_arr = NULL, i; 1474 1475 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1476 if (!dev) 1477 return -EINVAL; 1478 1479 if (!args->n_devices) { 1480 pr_debug("Device IDs array empty\n"); 1481 return -EINVAL; 1482 } 1483 if (args->n_success > args->n_devices) { 1484 pr_debug("n_success exceeds n_devices\n"); 1485 return -EINVAL; 1486 } 1487 1488 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1489 GFP_KERNEL); 1490 if (!devices_arr) 1491 return -ENOMEM; 1492 1493 err = copy_from_user(devices_arr, 1494 (void __user *)args->device_ids_array_ptr, 1495 args->n_devices * sizeof(*devices_arr)); 1496 if (err != 0) { 1497 err = -EFAULT; 1498 goto copy_from_user_failed; 1499 } 1500 1501 mutex_lock(&p->mutex); 1502 1503 pdd = kfd_get_process_device_data(dev, p); 1504 if (!pdd) { 1505 err = -EINVAL; 1506 goto bind_process_to_device_failed; 1507 } 1508 1509 mem = kfd_process_device_translate_handle(pdd, 1510 GET_IDR_HANDLE(args->handle)); 1511 if (!mem) { 1512 err = -ENOMEM; 1513 goto get_mem_obj_from_handle_failed; 1514 } 1515 1516 for (i = args->n_success; i < args->n_devices; i++) { 1517 peer = kfd_device_by_id(devices_arr[i]); 1518 if (!peer) { 1519 err = -EINVAL; 1520 goto get_mem_obj_from_handle_failed; 1521 } 1522 1523 peer_pdd = kfd_get_process_device_data(peer, p); 1524 if (!peer_pdd) { 1525 err = -ENODEV; 1526 goto get_mem_obj_from_handle_failed; 1527 } 1528 err = dev->kfd2kgd->unmap_memory_to_gpu( 1529 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1530 if (err) { 1531 pr_err("Failed to unmap from gpu %d/%d\n", 1532 i, args->n_devices); 1533 goto unmap_memory_from_gpu_failed; 1534 } 1535 args->n_success = i+1; 1536 } 1537 kfree(devices_arr); 1538 1539 mutex_unlock(&p->mutex); 1540 1541 return 0; 1542 1543 bind_process_to_device_failed: 1544 get_mem_obj_from_handle_failed: 1545 unmap_memory_from_gpu_failed: 1546 mutex_unlock(&p->mutex); 1547 copy_from_user_failed: 1548 kfree(devices_arr); 1549 return err; 1550 } 1551 1552 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1553 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1554 .cmd_drv = 0, .name = #ioctl} 1555 1556 /** Ioctl table */ 1557 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1558 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1559 kfd_ioctl_get_version, 0), 1560 1561 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1562 kfd_ioctl_create_queue, 0), 1563 1564 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1565 kfd_ioctl_destroy_queue, 0), 1566 1567 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1568 kfd_ioctl_set_memory_policy, 0), 1569 1570 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1571 kfd_ioctl_get_clock_counters, 0), 1572 1573 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1574 kfd_ioctl_get_process_apertures, 0), 1575 1576 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1577 kfd_ioctl_update_queue, 0), 1578 1579 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1580 kfd_ioctl_create_event, 0), 1581 1582 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1583 kfd_ioctl_destroy_event, 0), 1584 1585 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1586 kfd_ioctl_set_event, 0), 1587 1588 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1589 kfd_ioctl_reset_event, 0), 1590 1591 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1592 kfd_ioctl_wait_events, 0), 1593 1594 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1595 kfd_ioctl_dbg_register, 0), 1596 1597 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1598 kfd_ioctl_dbg_unregister, 0), 1599 1600 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1601 kfd_ioctl_dbg_address_watch, 0), 1602 1603 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1604 kfd_ioctl_dbg_wave_control, 0), 1605 1606 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1607 kfd_ioctl_set_scratch_backing_va, 0), 1608 1609 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1610 kfd_ioctl_get_tile_config, 0), 1611 1612 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1613 kfd_ioctl_set_trap_handler, 0), 1614 1615 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1616 kfd_ioctl_get_process_apertures_new, 0), 1617 1618 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1619 kfd_ioctl_acquire_vm, 0), 1620 1621 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1622 kfd_ioctl_alloc_memory_of_gpu, 0), 1623 1624 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1625 kfd_ioctl_free_memory_of_gpu, 0), 1626 1627 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1628 kfd_ioctl_map_memory_to_gpu, 0), 1629 1630 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1631 kfd_ioctl_unmap_memory_from_gpu, 0), 1632 1633 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 1634 kfd_ioctl_set_cu_mask, 0), 1635 1636 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 1637 kfd_ioctl_get_queue_wave_state, 0) 1638 1639 }; 1640 1641 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1642 1643 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1644 { 1645 struct kfd_process *process; 1646 amdkfd_ioctl_t *func; 1647 const struct amdkfd_ioctl_desc *ioctl = NULL; 1648 unsigned int nr = _IOC_NR(cmd); 1649 char stack_kdata[128]; 1650 char *kdata = NULL; 1651 unsigned int usize, asize; 1652 int retcode = -EINVAL; 1653 1654 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1655 goto err_i1; 1656 1657 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1658 u32 amdkfd_size; 1659 1660 ioctl = &amdkfd_ioctls[nr]; 1661 1662 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1663 usize = asize = _IOC_SIZE(cmd); 1664 if (amdkfd_size > asize) 1665 asize = amdkfd_size; 1666 1667 cmd = ioctl->cmd; 1668 } else 1669 goto err_i1; 1670 1671 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1672 1673 process = kfd_get_process(current); 1674 if (IS_ERR(process)) { 1675 dev_dbg(kfd_device, "no process\n"); 1676 goto err_i1; 1677 } 1678 1679 /* Do not trust userspace, use our own definition */ 1680 func = ioctl->func; 1681 1682 if (unlikely(!func)) { 1683 dev_dbg(kfd_device, "no function\n"); 1684 retcode = -EINVAL; 1685 goto err_i1; 1686 } 1687 1688 if (cmd & (IOC_IN | IOC_OUT)) { 1689 if (asize <= sizeof(stack_kdata)) { 1690 kdata = stack_kdata; 1691 } else { 1692 kdata = kmalloc(asize, GFP_KERNEL); 1693 if (!kdata) { 1694 retcode = -ENOMEM; 1695 goto err_i1; 1696 } 1697 } 1698 if (asize > usize) 1699 memset(kdata + usize, 0, asize - usize); 1700 } 1701 1702 if (cmd & IOC_IN) { 1703 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1704 retcode = -EFAULT; 1705 goto err_i1; 1706 } 1707 } else if (cmd & IOC_OUT) { 1708 memset(kdata, 0, usize); 1709 } 1710 1711 retcode = func(filep, process, kdata); 1712 1713 if (cmd & IOC_OUT) 1714 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1715 retcode = -EFAULT; 1716 1717 err_i1: 1718 if (!ioctl) 1719 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1720 task_pid_nr(current), cmd, nr); 1721 1722 if (kdata != stack_kdata) 1723 kfree(kdata); 1724 1725 if (retcode) 1726 dev_dbg(kfd_device, "ret = %d\n", retcode); 1727 1728 return retcode; 1729 } 1730 1731 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1732 { 1733 struct kfd_process *process; 1734 struct kfd_dev *dev = NULL; 1735 unsigned long vm_pgoff; 1736 unsigned int gpu_id; 1737 1738 process = kfd_get_process(current); 1739 if (IS_ERR(process)) 1740 return PTR_ERR(process); 1741 1742 vm_pgoff = vma->vm_pgoff; 1743 vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); 1744 gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); 1745 if (gpu_id) 1746 dev = kfd_device_by_id(gpu_id); 1747 1748 switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { 1749 case KFD_MMAP_TYPE_DOORBELL: 1750 if (!dev) 1751 return -ENODEV; 1752 return kfd_doorbell_mmap(dev, process, vma); 1753 1754 case KFD_MMAP_TYPE_EVENTS: 1755 return kfd_event_mmap(process, vma); 1756 1757 case KFD_MMAP_TYPE_RESERVED_MEM: 1758 if (!dev) 1759 return -ENODEV; 1760 return kfd_reserved_mem_mmap(dev, process, vma); 1761 } 1762 1763 return -EFAULT; 1764 } 1765