1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <asm/processor.h> 37 #include "kfd_priv.h" 38 #include "kfd_device_queue_manager.h" 39 #include "kfd_dbgmgr.h" 40 41 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 42 static int kfd_open(struct inode *, struct file *); 43 static int kfd_mmap(struct file *, struct vm_area_struct *); 44 45 static const char kfd_dev_name[] = "kfd"; 46 47 static const struct file_operations kfd_fops = { 48 .owner = THIS_MODULE, 49 .unlocked_ioctl = kfd_ioctl, 50 .compat_ioctl = kfd_ioctl, 51 .open = kfd_open, 52 .mmap = kfd_mmap, 53 }; 54 55 static int kfd_char_dev_major = -1; 56 static struct class *kfd_class; 57 struct device *kfd_device; 58 59 int kfd_chardev_init(void) 60 { 61 int err = 0; 62 63 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 64 err = kfd_char_dev_major; 65 if (err < 0) 66 goto err_register_chrdev; 67 68 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 69 err = PTR_ERR(kfd_class); 70 if (IS_ERR(kfd_class)) 71 goto err_class_create; 72 73 kfd_device = device_create(kfd_class, NULL, 74 MKDEV(kfd_char_dev_major, 0), 75 NULL, kfd_dev_name); 76 err = PTR_ERR(kfd_device); 77 if (IS_ERR(kfd_device)) 78 goto err_device_create; 79 80 return 0; 81 82 err_device_create: 83 class_destroy(kfd_class); 84 err_class_create: 85 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 86 err_register_chrdev: 87 return err; 88 } 89 90 void kfd_chardev_exit(void) 91 { 92 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 93 class_destroy(kfd_class); 94 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 95 } 96 97 struct device *kfd_chardev(void) 98 { 99 return kfd_device; 100 } 101 102 103 static int kfd_open(struct inode *inode, struct file *filep) 104 { 105 struct kfd_process *process; 106 bool is_32bit_user_mode; 107 108 if (iminor(inode) != 0) 109 return -ENODEV; 110 111 is_32bit_user_mode = in_compat_syscall(); 112 113 if (is_32bit_user_mode) { 114 dev_warn(kfd_device, 115 "Process %d (32-bit) failed to open /dev/kfd\n" 116 "32-bit processes are not supported by amdkfd\n", 117 current->pid); 118 return -EPERM; 119 } 120 121 process = kfd_create_process(filep); 122 if (IS_ERR(process)) 123 return PTR_ERR(process); 124 125 if (kfd_is_locked()) 126 return -EAGAIN; 127 128 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 129 process->pasid, process->is_32bit_user_mode); 130 131 return 0; 132 } 133 134 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 135 void *data) 136 { 137 struct kfd_ioctl_get_version_args *args = data; 138 139 args->major_version = KFD_IOCTL_MAJOR_VERSION; 140 args->minor_version = KFD_IOCTL_MINOR_VERSION; 141 142 return 0; 143 } 144 145 static int set_queue_properties_from_user(struct queue_properties *q_properties, 146 struct kfd_ioctl_create_queue_args *args) 147 { 148 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 149 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 150 return -EINVAL; 151 } 152 153 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 154 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 155 return -EINVAL; 156 } 157 158 if ((args->ring_base_address) && 159 (!access_ok(VERIFY_WRITE, 160 (const void __user *) args->ring_base_address, 161 sizeof(uint64_t)))) { 162 pr_err("Can't access ring base address\n"); 163 return -EFAULT; 164 } 165 166 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 167 pr_err("Ring size must be a power of 2 or 0\n"); 168 return -EINVAL; 169 } 170 171 if (!access_ok(VERIFY_WRITE, 172 (const void __user *) args->read_pointer_address, 173 sizeof(uint32_t))) { 174 pr_err("Can't access read pointer\n"); 175 return -EFAULT; 176 } 177 178 if (!access_ok(VERIFY_WRITE, 179 (const void __user *) args->write_pointer_address, 180 sizeof(uint32_t))) { 181 pr_err("Can't access write pointer\n"); 182 return -EFAULT; 183 } 184 185 if (args->eop_buffer_address && 186 !access_ok(VERIFY_WRITE, 187 (const void __user *) args->eop_buffer_address, 188 sizeof(uint32_t))) { 189 pr_debug("Can't access eop buffer"); 190 return -EFAULT; 191 } 192 193 if (args->ctx_save_restore_address && 194 !access_ok(VERIFY_WRITE, 195 (const void __user *) args->ctx_save_restore_address, 196 sizeof(uint32_t))) { 197 pr_debug("Can't access ctx save restore buffer"); 198 return -EFAULT; 199 } 200 201 q_properties->is_interop = false; 202 q_properties->queue_percent = args->queue_percentage; 203 q_properties->priority = args->queue_priority; 204 q_properties->queue_address = args->ring_base_address; 205 q_properties->queue_size = args->ring_size; 206 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 207 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 208 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 209 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 210 q_properties->ctx_save_restore_area_address = 211 args->ctx_save_restore_address; 212 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 213 q_properties->ctl_stack_size = args->ctl_stack_size; 214 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 215 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 216 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 217 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 218 q_properties->type = KFD_QUEUE_TYPE_SDMA; 219 else 220 return -ENOTSUPP; 221 222 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 223 q_properties->format = KFD_QUEUE_FORMAT_AQL; 224 else 225 q_properties->format = KFD_QUEUE_FORMAT_PM4; 226 227 pr_debug("Queue Percentage: %d, %d\n", 228 q_properties->queue_percent, args->queue_percentage); 229 230 pr_debug("Queue Priority: %d, %d\n", 231 q_properties->priority, args->queue_priority); 232 233 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 234 q_properties->queue_address, args->ring_base_address); 235 236 pr_debug("Queue Size: 0x%llX, %u\n", 237 q_properties->queue_size, args->ring_size); 238 239 pr_debug("Queue r/w Pointers: %px, %px\n", 240 q_properties->read_ptr, 241 q_properties->write_ptr); 242 243 pr_debug("Queue Format: %d\n", q_properties->format); 244 245 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 246 247 pr_debug("Queue CTX save area: 0x%llX\n", 248 q_properties->ctx_save_restore_area_address); 249 250 return 0; 251 } 252 253 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 254 void *data) 255 { 256 struct kfd_ioctl_create_queue_args *args = data; 257 struct kfd_dev *dev; 258 int err = 0; 259 unsigned int queue_id; 260 struct kfd_process_device *pdd; 261 struct queue_properties q_properties; 262 263 memset(&q_properties, 0, sizeof(struct queue_properties)); 264 265 pr_debug("Creating queue ioctl\n"); 266 267 err = set_queue_properties_from_user(&q_properties, args); 268 if (err) 269 return err; 270 271 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 272 dev = kfd_device_by_id(args->gpu_id); 273 if (!dev) { 274 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 275 return -EINVAL; 276 } 277 278 mutex_lock(&p->mutex); 279 280 pdd = kfd_bind_process_to_device(dev, p); 281 if (IS_ERR(pdd)) { 282 err = -ESRCH; 283 goto err_bind_process; 284 } 285 286 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 287 p->pasid, 288 dev->id); 289 290 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 291 if (err != 0) 292 goto err_create_queue; 293 294 args->queue_id = queue_id; 295 296 297 /* Return gpu_id as doorbell offset for mmap usage */ 298 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 299 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 300 args->doorbell_offset <<= PAGE_SHIFT; 301 if (KFD_IS_SOC15(dev->device_info->asic_family)) 302 /* On SOC15 ASICs, doorbell allocation must be 303 * per-device, and independent from the per-process 304 * queue_id. Return the doorbell offset within the 305 * doorbell aperture to user mode. 306 */ 307 args->doorbell_offset |= q_properties.doorbell_off; 308 309 mutex_unlock(&p->mutex); 310 311 pr_debug("Queue id %d was created successfully\n", args->queue_id); 312 313 pr_debug("Ring buffer address == 0x%016llX\n", 314 args->ring_base_address); 315 316 pr_debug("Read ptr address == 0x%016llX\n", 317 args->read_pointer_address); 318 319 pr_debug("Write ptr address == 0x%016llX\n", 320 args->write_pointer_address); 321 322 return 0; 323 324 err_create_queue: 325 err_bind_process: 326 mutex_unlock(&p->mutex); 327 return err; 328 } 329 330 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 331 void *data) 332 { 333 int retval; 334 struct kfd_ioctl_destroy_queue_args *args = data; 335 336 pr_debug("Destroying queue id %d for pasid %d\n", 337 args->queue_id, 338 p->pasid); 339 340 mutex_lock(&p->mutex); 341 342 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 343 344 mutex_unlock(&p->mutex); 345 return retval; 346 } 347 348 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 349 void *data) 350 { 351 int retval; 352 struct kfd_ioctl_update_queue_args *args = data; 353 struct queue_properties properties; 354 355 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 356 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 357 return -EINVAL; 358 } 359 360 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 361 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 362 return -EINVAL; 363 } 364 365 if ((args->ring_base_address) && 366 (!access_ok(VERIFY_WRITE, 367 (const void __user *) args->ring_base_address, 368 sizeof(uint64_t)))) { 369 pr_err("Can't access ring base address\n"); 370 return -EFAULT; 371 } 372 373 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 374 pr_err("Ring size must be a power of 2 or 0\n"); 375 return -EINVAL; 376 } 377 378 properties.queue_address = args->ring_base_address; 379 properties.queue_size = args->ring_size; 380 properties.queue_percent = args->queue_percentage; 381 properties.priority = args->queue_priority; 382 383 pr_debug("Updating queue id %d for pasid %d\n", 384 args->queue_id, p->pasid); 385 386 mutex_lock(&p->mutex); 387 388 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 389 390 mutex_unlock(&p->mutex); 391 392 return retval; 393 } 394 395 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 396 void *data) 397 { 398 int retval; 399 const int max_num_cus = 1024; 400 struct kfd_ioctl_set_cu_mask_args *args = data; 401 struct queue_properties properties; 402 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 403 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 404 405 if ((args->num_cu_mask % 32) != 0) { 406 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 407 args->num_cu_mask); 408 return -EINVAL; 409 } 410 411 properties.cu_mask_count = args->num_cu_mask; 412 if (properties.cu_mask_count == 0) { 413 pr_debug("CU mask cannot be 0"); 414 return -EINVAL; 415 } 416 417 /* To prevent an unreasonably large CU mask size, set an arbitrary 418 * limit of max_num_cus bits. We can then just drop any CU mask bits 419 * past max_num_cus bits and just use the first max_num_cus bits. 420 */ 421 if (properties.cu_mask_count > max_num_cus) { 422 pr_debug("CU mask cannot be greater than 1024 bits"); 423 properties.cu_mask_count = max_num_cus; 424 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 425 } 426 427 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); 428 if (!properties.cu_mask) 429 return -ENOMEM; 430 431 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); 432 if (retval) { 433 pr_debug("Could not copy CU mask from userspace"); 434 kfree(properties.cu_mask); 435 return -EFAULT; 436 } 437 438 mutex_lock(&p->mutex); 439 440 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); 441 442 mutex_unlock(&p->mutex); 443 444 if (retval) 445 kfree(properties.cu_mask); 446 447 return retval; 448 } 449 450 static int kfd_ioctl_set_memory_policy(struct file *filep, 451 struct kfd_process *p, void *data) 452 { 453 struct kfd_ioctl_set_memory_policy_args *args = data; 454 struct kfd_dev *dev; 455 int err = 0; 456 struct kfd_process_device *pdd; 457 enum cache_policy default_policy, alternate_policy; 458 459 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 460 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 461 return -EINVAL; 462 } 463 464 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 465 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 466 return -EINVAL; 467 } 468 469 dev = kfd_device_by_id(args->gpu_id); 470 if (!dev) 471 return -EINVAL; 472 473 mutex_lock(&p->mutex); 474 475 pdd = kfd_bind_process_to_device(dev, p); 476 if (IS_ERR(pdd)) { 477 err = -ESRCH; 478 goto out; 479 } 480 481 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 482 ? cache_policy_coherent : cache_policy_noncoherent; 483 484 alternate_policy = 485 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 486 ? cache_policy_coherent : cache_policy_noncoherent; 487 488 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 489 &pdd->qpd, 490 default_policy, 491 alternate_policy, 492 (void __user *)args->alternate_aperture_base, 493 args->alternate_aperture_size)) 494 err = -EINVAL; 495 496 out: 497 mutex_unlock(&p->mutex); 498 499 return err; 500 } 501 502 static int kfd_ioctl_set_trap_handler(struct file *filep, 503 struct kfd_process *p, void *data) 504 { 505 struct kfd_ioctl_set_trap_handler_args *args = data; 506 struct kfd_dev *dev; 507 int err = 0; 508 struct kfd_process_device *pdd; 509 510 dev = kfd_device_by_id(args->gpu_id); 511 if (dev == NULL) 512 return -EINVAL; 513 514 mutex_lock(&p->mutex); 515 516 pdd = kfd_bind_process_to_device(dev, p); 517 if (IS_ERR(pdd)) { 518 err = -ESRCH; 519 goto out; 520 } 521 522 if (dev->dqm->ops.set_trap_handler(dev->dqm, 523 &pdd->qpd, 524 args->tba_addr, 525 args->tma_addr)) 526 err = -EINVAL; 527 528 out: 529 mutex_unlock(&p->mutex); 530 531 return err; 532 } 533 534 static int kfd_ioctl_dbg_register(struct file *filep, 535 struct kfd_process *p, void *data) 536 { 537 struct kfd_ioctl_dbg_register_args *args = data; 538 struct kfd_dev *dev; 539 struct kfd_dbgmgr *dbgmgr_ptr; 540 struct kfd_process_device *pdd; 541 bool create_ok; 542 long status = 0; 543 544 dev = kfd_device_by_id(args->gpu_id); 545 if (!dev) 546 return -EINVAL; 547 548 if (dev->device_info->asic_family == CHIP_CARRIZO) { 549 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 550 return -EINVAL; 551 } 552 553 mutex_lock(&p->mutex); 554 mutex_lock(kfd_get_dbgmgr_mutex()); 555 556 /* 557 * make sure that we have pdd, if this the first queue created for 558 * this process 559 */ 560 pdd = kfd_bind_process_to_device(dev, p); 561 if (IS_ERR(pdd)) { 562 status = PTR_ERR(pdd); 563 goto out; 564 } 565 566 if (!dev->dbgmgr) { 567 /* In case of a legal call, we have no dbgmgr yet */ 568 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 569 if (create_ok) { 570 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 571 if (status != 0) 572 kfd_dbgmgr_destroy(dbgmgr_ptr); 573 else 574 dev->dbgmgr = dbgmgr_ptr; 575 } 576 } else { 577 pr_debug("debugger already registered\n"); 578 status = -EINVAL; 579 } 580 581 out: 582 mutex_unlock(kfd_get_dbgmgr_mutex()); 583 mutex_unlock(&p->mutex); 584 585 return status; 586 } 587 588 static int kfd_ioctl_dbg_unregister(struct file *filep, 589 struct kfd_process *p, void *data) 590 { 591 struct kfd_ioctl_dbg_unregister_args *args = data; 592 struct kfd_dev *dev; 593 long status; 594 595 dev = kfd_device_by_id(args->gpu_id); 596 if (!dev || !dev->dbgmgr) 597 return -EINVAL; 598 599 if (dev->device_info->asic_family == CHIP_CARRIZO) { 600 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 601 return -EINVAL; 602 } 603 604 mutex_lock(kfd_get_dbgmgr_mutex()); 605 606 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 607 if (!status) { 608 kfd_dbgmgr_destroy(dev->dbgmgr); 609 dev->dbgmgr = NULL; 610 } 611 612 mutex_unlock(kfd_get_dbgmgr_mutex()); 613 614 return status; 615 } 616 617 /* 618 * Parse and generate variable size data structure for address watch. 619 * Total size of the buffer and # watch points is limited in order 620 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 621 * which is enforced by dbgdev module) 622 * please also note that the watch address itself are not "copied from user", 623 * since it be set into the HW in user mode values. 624 * 625 */ 626 static int kfd_ioctl_dbg_address_watch(struct file *filep, 627 struct kfd_process *p, void *data) 628 { 629 struct kfd_ioctl_dbg_address_watch_args *args = data; 630 struct kfd_dev *dev; 631 struct dbg_address_watch_info aw_info; 632 unsigned char *args_buff; 633 long status; 634 void __user *cmd_from_user; 635 uint64_t watch_mask_value = 0; 636 unsigned int args_idx = 0; 637 638 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 639 640 dev = kfd_device_by_id(args->gpu_id); 641 if (!dev) 642 return -EINVAL; 643 644 if (dev->device_info->asic_family == CHIP_CARRIZO) { 645 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 646 return -EINVAL; 647 } 648 649 cmd_from_user = (void __user *) args->content_ptr; 650 651 /* Validate arguments */ 652 653 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 654 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 655 (cmd_from_user == NULL)) 656 return -EINVAL; 657 658 /* this is the actual buffer to work with */ 659 args_buff = memdup_user(cmd_from_user, 660 args->buf_size_in_bytes - sizeof(*args)); 661 if (IS_ERR(args_buff)) 662 return PTR_ERR(args_buff); 663 664 aw_info.process = p; 665 666 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 667 args_idx += sizeof(aw_info.num_watch_points); 668 669 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 670 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 671 672 /* 673 * set watch address base pointer to point on the array base 674 * within args_buff 675 */ 676 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 677 678 /* skip over the addresses buffer */ 679 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 680 681 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 682 status = -EINVAL; 683 goto out; 684 } 685 686 watch_mask_value = (uint64_t) args_buff[args_idx]; 687 688 if (watch_mask_value > 0) { 689 /* 690 * There is an array of masks. 691 * set watch mask base pointer to point on the array base 692 * within args_buff 693 */ 694 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 695 696 /* skip over the masks buffer */ 697 args_idx += sizeof(aw_info.watch_mask) * 698 aw_info.num_watch_points; 699 } else { 700 /* just the NULL mask, set to NULL and skip over it */ 701 aw_info.watch_mask = NULL; 702 args_idx += sizeof(aw_info.watch_mask); 703 } 704 705 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 706 status = -EINVAL; 707 goto out; 708 } 709 710 /* Currently HSA Event is not supported for DBG */ 711 aw_info.watch_event = NULL; 712 713 mutex_lock(kfd_get_dbgmgr_mutex()); 714 715 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 716 717 mutex_unlock(kfd_get_dbgmgr_mutex()); 718 719 out: 720 kfree(args_buff); 721 722 return status; 723 } 724 725 /* Parse and generate fixed size data structure for wave control */ 726 static int kfd_ioctl_dbg_wave_control(struct file *filep, 727 struct kfd_process *p, void *data) 728 { 729 struct kfd_ioctl_dbg_wave_control_args *args = data; 730 struct kfd_dev *dev; 731 struct dbg_wave_control_info wac_info; 732 unsigned char *args_buff; 733 uint32_t computed_buff_size; 734 long status; 735 void __user *cmd_from_user; 736 unsigned int args_idx = 0; 737 738 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 739 740 /* we use compact form, independent of the packing attribute value */ 741 computed_buff_size = sizeof(*args) + 742 sizeof(wac_info.mode) + 743 sizeof(wac_info.operand) + 744 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 745 sizeof(wac_info.dbgWave_msg.MemoryVA) + 746 sizeof(wac_info.trapId); 747 748 dev = kfd_device_by_id(args->gpu_id); 749 if (!dev) 750 return -EINVAL; 751 752 if (dev->device_info->asic_family == CHIP_CARRIZO) { 753 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 754 return -EINVAL; 755 } 756 757 /* input size must match the computed "compact" size */ 758 if (args->buf_size_in_bytes != computed_buff_size) { 759 pr_debug("size mismatch, computed : actual %u : %u\n", 760 args->buf_size_in_bytes, computed_buff_size); 761 return -EINVAL; 762 } 763 764 cmd_from_user = (void __user *) args->content_ptr; 765 766 if (cmd_from_user == NULL) 767 return -EINVAL; 768 769 /* copy the entire buffer from user */ 770 771 args_buff = memdup_user(cmd_from_user, 772 args->buf_size_in_bytes - sizeof(*args)); 773 if (IS_ERR(args_buff)) 774 return PTR_ERR(args_buff); 775 776 /* move ptr to the start of the "pay-load" area */ 777 wac_info.process = p; 778 779 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 780 args_idx += sizeof(wac_info.operand); 781 782 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 783 args_idx += sizeof(wac_info.mode); 784 785 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 786 args_idx += sizeof(wac_info.trapId); 787 788 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 789 *((uint32_t *)(&args_buff[args_idx])); 790 wac_info.dbgWave_msg.MemoryVA = NULL; 791 792 mutex_lock(kfd_get_dbgmgr_mutex()); 793 794 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 795 wac_info.process, wac_info.operand, 796 wac_info.mode, wac_info.trapId, 797 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 798 799 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 800 801 pr_debug("Returned status of dbg manager is %ld\n", status); 802 803 mutex_unlock(kfd_get_dbgmgr_mutex()); 804 805 kfree(args_buff); 806 807 return status; 808 } 809 810 static int kfd_ioctl_get_clock_counters(struct file *filep, 811 struct kfd_process *p, void *data) 812 { 813 struct kfd_ioctl_get_clock_counters_args *args = data; 814 struct kfd_dev *dev; 815 816 dev = kfd_device_by_id(args->gpu_id); 817 if (dev) 818 /* Reading GPU clock counter from KGD */ 819 args->gpu_clock_counter = 820 dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); 821 else 822 /* Node without GPU resource */ 823 args->gpu_clock_counter = 0; 824 825 /* No access to rdtsc. Using raw monotonic time */ 826 args->cpu_clock_counter = ktime_get_raw_ns(); 827 args->system_clock_counter = ktime_get_boot_ns(); 828 829 /* Since the counter is in nano-seconds we use 1GHz frequency */ 830 args->system_clock_freq = 1000000000; 831 832 return 0; 833 } 834 835 836 static int kfd_ioctl_get_process_apertures(struct file *filp, 837 struct kfd_process *p, void *data) 838 { 839 struct kfd_ioctl_get_process_apertures_args *args = data; 840 struct kfd_process_device_apertures *pAperture; 841 struct kfd_process_device *pdd; 842 843 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 844 845 args->num_of_nodes = 0; 846 847 mutex_lock(&p->mutex); 848 849 /*if the process-device list isn't empty*/ 850 if (kfd_has_process_device_data(p)) { 851 /* Run over all pdd of the process */ 852 pdd = kfd_get_first_process_device_data(p); 853 do { 854 pAperture = 855 &args->process_apertures[args->num_of_nodes]; 856 pAperture->gpu_id = pdd->dev->id; 857 pAperture->lds_base = pdd->lds_base; 858 pAperture->lds_limit = pdd->lds_limit; 859 pAperture->gpuvm_base = pdd->gpuvm_base; 860 pAperture->gpuvm_limit = pdd->gpuvm_limit; 861 pAperture->scratch_base = pdd->scratch_base; 862 pAperture->scratch_limit = pdd->scratch_limit; 863 864 dev_dbg(kfd_device, 865 "node id %u\n", args->num_of_nodes); 866 dev_dbg(kfd_device, 867 "gpu id %u\n", pdd->dev->id); 868 dev_dbg(kfd_device, 869 "lds_base %llX\n", pdd->lds_base); 870 dev_dbg(kfd_device, 871 "lds_limit %llX\n", pdd->lds_limit); 872 dev_dbg(kfd_device, 873 "gpuvm_base %llX\n", pdd->gpuvm_base); 874 dev_dbg(kfd_device, 875 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 876 dev_dbg(kfd_device, 877 "scratch_base %llX\n", pdd->scratch_base); 878 dev_dbg(kfd_device, 879 "scratch_limit %llX\n", pdd->scratch_limit); 880 881 args->num_of_nodes++; 882 883 pdd = kfd_get_next_process_device_data(p, pdd); 884 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 885 } 886 887 mutex_unlock(&p->mutex); 888 889 return 0; 890 } 891 892 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 893 struct kfd_process *p, void *data) 894 { 895 struct kfd_ioctl_get_process_apertures_new_args *args = data; 896 struct kfd_process_device_apertures *pa; 897 struct kfd_process_device *pdd; 898 uint32_t nodes = 0; 899 int ret; 900 901 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 902 903 if (args->num_of_nodes == 0) { 904 /* Return number of nodes, so that user space can alloacate 905 * sufficient memory 906 */ 907 mutex_lock(&p->mutex); 908 909 if (!kfd_has_process_device_data(p)) 910 goto out_unlock; 911 912 /* Run over all pdd of the process */ 913 pdd = kfd_get_first_process_device_data(p); 914 do { 915 args->num_of_nodes++; 916 pdd = kfd_get_next_process_device_data(p, pdd); 917 } while (pdd); 918 919 goto out_unlock; 920 } 921 922 /* Fill in process-aperture information for all available 923 * nodes, but not more than args->num_of_nodes as that is 924 * the amount of memory allocated by user 925 */ 926 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 927 args->num_of_nodes), GFP_KERNEL); 928 if (!pa) 929 return -ENOMEM; 930 931 mutex_lock(&p->mutex); 932 933 if (!kfd_has_process_device_data(p)) { 934 args->num_of_nodes = 0; 935 kfree(pa); 936 goto out_unlock; 937 } 938 939 /* Run over all pdd of the process */ 940 pdd = kfd_get_first_process_device_data(p); 941 do { 942 pa[nodes].gpu_id = pdd->dev->id; 943 pa[nodes].lds_base = pdd->lds_base; 944 pa[nodes].lds_limit = pdd->lds_limit; 945 pa[nodes].gpuvm_base = pdd->gpuvm_base; 946 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 947 pa[nodes].scratch_base = pdd->scratch_base; 948 pa[nodes].scratch_limit = pdd->scratch_limit; 949 950 dev_dbg(kfd_device, 951 "gpu id %u\n", pdd->dev->id); 952 dev_dbg(kfd_device, 953 "lds_base %llX\n", pdd->lds_base); 954 dev_dbg(kfd_device, 955 "lds_limit %llX\n", pdd->lds_limit); 956 dev_dbg(kfd_device, 957 "gpuvm_base %llX\n", pdd->gpuvm_base); 958 dev_dbg(kfd_device, 959 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 960 dev_dbg(kfd_device, 961 "scratch_base %llX\n", pdd->scratch_base); 962 dev_dbg(kfd_device, 963 "scratch_limit %llX\n", pdd->scratch_limit); 964 nodes++; 965 966 pdd = kfd_get_next_process_device_data(p, pdd); 967 } while (pdd && (nodes < args->num_of_nodes)); 968 mutex_unlock(&p->mutex); 969 970 args->num_of_nodes = nodes; 971 ret = copy_to_user( 972 (void __user *)args->kfd_process_device_apertures_ptr, 973 pa, 974 (nodes * sizeof(struct kfd_process_device_apertures))); 975 kfree(pa); 976 return ret ? -EFAULT : 0; 977 978 out_unlock: 979 mutex_unlock(&p->mutex); 980 return 0; 981 } 982 983 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 984 void *data) 985 { 986 struct kfd_ioctl_create_event_args *args = data; 987 int err; 988 989 /* For dGPUs the event page is allocated in user mode. The 990 * handle is passed to KFD with the first call to this IOCTL 991 * through the event_page_offset field. 992 */ 993 if (args->event_page_offset) { 994 struct kfd_dev *kfd; 995 struct kfd_process_device *pdd; 996 void *mem, *kern_addr; 997 uint64_t size; 998 999 if (p->signal_page) { 1000 pr_err("Event page is already set\n"); 1001 return -EINVAL; 1002 } 1003 1004 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 1005 if (!kfd) { 1006 pr_err("Getting device by id failed in %s\n", __func__); 1007 return -EINVAL; 1008 } 1009 1010 mutex_lock(&p->mutex); 1011 pdd = kfd_bind_process_to_device(kfd, p); 1012 if (IS_ERR(pdd)) { 1013 err = PTR_ERR(pdd); 1014 goto out_unlock; 1015 } 1016 1017 mem = kfd_process_device_translate_handle(pdd, 1018 GET_IDR_HANDLE(args->event_page_offset)); 1019 if (!mem) { 1020 pr_err("Can't find BO, offset is 0x%llx\n", 1021 args->event_page_offset); 1022 err = -EINVAL; 1023 goto out_unlock; 1024 } 1025 mutex_unlock(&p->mutex); 1026 1027 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, 1028 mem, &kern_addr, &size); 1029 if (err) { 1030 pr_err("Failed to map event page to kernel\n"); 1031 return err; 1032 } 1033 1034 err = kfd_event_page_set(p, kern_addr, size); 1035 if (err) { 1036 pr_err("Failed to set event page\n"); 1037 return err; 1038 } 1039 } 1040 1041 err = kfd_event_create(filp, p, args->event_type, 1042 args->auto_reset != 0, args->node_id, 1043 &args->event_id, &args->event_trigger_data, 1044 &args->event_page_offset, 1045 &args->event_slot_index); 1046 1047 return err; 1048 1049 out_unlock: 1050 mutex_unlock(&p->mutex); 1051 return err; 1052 } 1053 1054 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1055 void *data) 1056 { 1057 struct kfd_ioctl_destroy_event_args *args = data; 1058 1059 return kfd_event_destroy(p, args->event_id); 1060 } 1061 1062 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1063 void *data) 1064 { 1065 struct kfd_ioctl_set_event_args *args = data; 1066 1067 return kfd_set_event(p, args->event_id); 1068 } 1069 1070 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1071 void *data) 1072 { 1073 struct kfd_ioctl_reset_event_args *args = data; 1074 1075 return kfd_reset_event(p, args->event_id); 1076 } 1077 1078 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1079 void *data) 1080 { 1081 struct kfd_ioctl_wait_events_args *args = data; 1082 int err; 1083 1084 err = kfd_wait_on_events(p, args->num_events, 1085 (void __user *)args->events_ptr, 1086 (args->wait_for_all != 0), 1087 args->timeout, &args->wait_result); 1088 1089 return err; 1090 } 1091 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1092 struct kfd_process *p, void *data) 1093 { 1094 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1095 struct kfd_process_device *pdd; 1096 struct kfd_dev *dev; 1097 long err; 1098 1099 dev = kfd_device_by_id(args->gpu_id); 1100 if (!dev) 1101 return -EINVAL; 1102 1103 mutex_lock(&p->mutex); 1104 1105 pdd = kfd_bind_process_to_device(dev, p); 1106 if (IS_ERR(pdd)) { 1107 err = PTR_ERR(pdd); 1108 goto bind_process_to_device_fail; 1109 } 1110 1111 pdd->qpd.sh_hidden_private_base = args->va_addr; 1112 1113 mutex_unlock(&p->mutex); 1114 1115 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1116 pdd->qpd.vmid != 0) 1117 dev->kfd2kgd->set_scratch_backing_va( 1118 dev->kgd, args->va_addr, pdd->qpd.vmid); 1119 1120 return 0; 1121 1122 bind_process_to_device_fail: 1123 mutex_unlock(&p->mutex); 1124 return err; 1125 } 1126 1127 static int kfd_ioctl_get_tile_config(struct file *filep, 1128 struct kfd_process *p, void *data) 1129 { 1130 struct kfd_ioctl_get_tile_config_args *args = data; 1131 struct kfd_dev *dev; 1132 struct tile_config config; 1133 int err = 0; 1134 1135 dev = kfd_device_by_id(args->gpu_id); 1136 if (!dev) 1137 return -EINVAL; 1138 1139 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1140 1141 args->gb_addr_config = config.gb_addr_config; 1142 args->num_banks = config.num_banks; 1143 args->num_ranks = config.num_ranks; 1144 1145 if (args->num_tile_configs > config.num_tile_configs) 1146 args->num_tile_configs = config.num_tile_configs; 1147 err = copy_to_user((void __user *)args->tile_config_ptr, 1148 config.tile_config_ptr, 1149 args->num_tile_configs * sizeof(uint32_t)); 1150 if (err) { 1151 args->num_tile_configs = 0; 1152 return -EFAULT; 1153 } 1154 1155 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1156 args->num_macro_tile_configs = 1157 config.num_macro_tile_configs; 1158 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1159 config.macro_tile_config_ptr, 1160 args->num_macro_tile_configs * sizeof(uint32_t)); 1161 if (err) { 1162 args->num_macro_tile_configs = 0; 1163 return -EFAULT; 1164 } 1165 1166 return 0; 1167 } 1168 1169 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1170 void *data) 1171 { 1172 struct kfd_ioctl_acquire_vm_args *args = data; 1173 struct kfd_process_device *pdd; 1174 struct kfd_dev *dev; 1175 struct file *drm_file; 1176 int ret; 1177 1178 dev = kfd_device_by_id(args->gpu_id); 1179 if (!dev) 1180 return -EINVAL; 1181 1182 drm_file = fget(args->drm_fd); 1183 if (!drm_file) 1184 return -EINVAL; 1185 1186 mutex_lock(&p->mutex); 1187 1188 pdd = kfd_get_process_device_data(dev, p); 1189 if (!pdd) { 1190 ret = -EINVAL; 1191 goto err_unlock; 1192 } 1193 1194 if (pdd->drm_file) { 1195 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1196 goto err_unlock; 1197 } 1198 1199 ret = kfd_process_device_init_vm(pdd, drm_file); 1200 if (ret) 1201 goto err_unlock; 1202 /* On success, the PDD keeps the drm_file reference */ 1203 mutex_unlock(&p->mutex); 1204 1205 return 0; 1206 1207 err_unlock: 1208 mutex_unlock(&p->mutex); 1209 fput(drm_file); 1210 return ret; 1211 } 1212 1213 static bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1214 { 1215 struct kfd_local_mem_info mem_info; 1216 1217 if (debug_largebar) { 1218 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1219 return true; 1220 } 1221 1222 if (dev->device_info->needs_iommu_device) 1223 return false; 1224 1225 dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); 1226 if (mem_info.local_mem_size_private == 0 && 1227 mem_info.local_mem_size_public > 0) 1228 return true; 1229 return false; 1230 } 1231 1232 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1233 struct kfd_process *p, void *data) 1234 { 1235 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1236 struct kfd_process_device *pdd; 1237 void *mem; 1238 struct kfd_dev *dev; 1239 int idr_handle; 1240 long err; 1241 uint64_t offset = args->mmap_offset; 1242 uint32_t flags = args->flags; 1243 1244 if (args->size == 0) 1245 return -EINVAL; 1246 1247 dev = kfd_device_by_id(args->gpu_id); 1248 if (!dev) 1249 return -EINVAL; 1250 1251 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1252 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1253 !kfd_dev_is_large_bar(dev)) { 1254 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1255 return -EINVAL; 1256 } 1257 1258 mutex_lock(&p->mutex); 1259 1260 pdd = kfd_bind_process_to_device(dev, p); 1261 if (IS_ERR(pdd)) { 1262 err = PTR_ERR(pdd); 1263 goto err_unlock; 1264 } 1265 1266 err = dev->kfd2kgd->alloc_memory_of_gpu( 1267 dev->kgd, args->va_addr, args->size, 1268 pdd->vm, (struct kgd_mem **) &mem, &offset, 1269 flags); 1270 1271 if (err) 1272 goto err_unlock; 1273 1274 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1275 if (idr_handle < 0) { 1276 err = -EFAULT; 1277 goto err_free; 1278 } 1279 1280 mutex_unlock(&p->mutex); 1281 1282 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1283 args->mmap_offset = offset; 1284 1285 return 0; 1286 1287 err_free: 1288 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1289 err_unlock: 1290 mutex_unlock(&p->mutex); 1291 return err; 1292 } 1293 1294 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1295 struct kfd_process *p, void *data) 1296 { 1297 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1298 struct kfd_process_device *pdd; 1299 void *mem; 1300 struct kfd_dev *dev; 1301 int ret; 1302 1303 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1304 if (!dev) 1305 return -EINVAL; 1306 1307 mutex_lock(&p->mutex); 1308 1309 pdd = kfd_get_process_device_data(dev, p); 1310 if (!pdd) { 1311 pr_err("Process device data doesn't exist\n"); 1312 ret = -EINVAL; 1313 goto err_unlock; 1314 } 1315 1316 mem = kfd_process_device_translate_handle( 1317 pdd, GET_IDR_HANDLE(args->handle)); 1318 if (!mem) { 1319 ret = -EINVAL; 1320 goto err_unlock; 1321 } 1322 1323 ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1324 1325 /* If freeing the buffer failed, leave the handle in place for 1326 * clean-up during process tear-down. 1327 */ 1328 if (!ret) 1329 kfd_process_device_remove_obj_handle( 1330 pdd, GET_IDR_HANDLE(args->handle)); 1331 1332 err_unlock: 1333 mutex_unlock(&p->mutex); 1334 return ret; 1335 } 1336 1337 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1338 struct kfd_process *p, void *data) 1339 { 1340 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1341 struct kfd_process_device *pdd, *peer_pdd; 1342 void *mem; 1343 struct kfd_dev *dev, *peer; 1344 long err = 0; 1345 int i; 1346 uint32_t *devices_arr = NULL; 1347 1348 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1349 if (!dev) 1350 return -EINVAL; 1351 1352 if (!args->n_devices) { 1353 pr_debug("Device IDs array empty\n"); 1354 return -EINVAL; 1355 } 1356 if (args->n_success > args->n_devices) { 1357 pr_debug("n_success exceeds n_devices\n"); 1358 return -EINVAL; 1359 } 1360 1361 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1362 GFP_KERNEL); 1363 if (!devices_arr) 1364 return -ENOMEM; 1365 1366 err = copy_from_user(devices_arr, 1367 (void __user *)args->device_ids_array_ptr, 1368 args->n_devices * sizeof(*devices_arr)); 1369 if (err != 0) { 1370 err = -EFAULT; 1371 goto copy_from_user_failed; 1372 } 1373 1374 mutex_lock(&p->mutex); 1375 1376 pdd = kfd_bind_process_to_device(dev, p); 1377 if (IS_ERR(pdd)) { 1378 err = PTR_ERR(pdd); 1379 goto bind_process_to_device_failed; 1380 } 1381 1382 mem = kfd_process_device_translate_handle(pdd, 1383 GET_IDR_HANDLE(args->handle)); 1384 if (!mem) { 1385 err = -ENOMEM; 1386 goto get_mem_obj_from_handle_failed; 1387 } 1388 1389 for (i = args->n_success; i < args->n_devices; i++) { 1390 peer = kfd_device_by_id(devices_arr[i]); 1391 if (!peer) { 1392 pr_debug("Getting device by id failed for 0x%x\n", 1393 devices_arr[i]); 1394 err = -EINVAL; 1395 goto get_mem_obj_from_handle_failed; 1396 } 1397 1398 peer_pdd = kfd_bind_process_to_device(peer, p); 1399 if (IS_ERR(peer_pdd)) { 1400 err = PTR_ERR(peer_pdd); 1401 goto get_mem_obj_from_handle_failed; 1402 } 1403 err = peer->kfd2kgd->map_memory_to_gpu( 1404 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1405 if (err) { 1406 pr_err("Failed to map to gpu %d/%d\n", 1407 i, args->n_devices); 1408 goto map_memory_to_gpu_failed; 1409 } 1410 args->n_success = i+1; 1411 } 1412 1413 mutex_unlock(&p->mutex); 1414 1415 err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1416 if (err) { 1417 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1418 goto sync_memory_failed; 1419 } 1420 1421 /* Flush TLBs after waiting for the page table updates to complete */ 1422 for (i = 0; i < args->n_devices; i++) { 1423 peer = kfd_device_by_id(devices_arr[i]); 1424 if (WARN_ON_ONCE(!peer)) 1425 continue; 1426 peer_pdd = kfd_get_process_device_data(peer, p); 1427 if (WARN_ON_ONCE(!peer_pdd)) 1428 continue; 1429 kfd_flush_tlb(peer_pdd); 1430 } 1431 1432 kfree(devices_arr); 1433 1434 return err; 1435 1436 bind_process_to_device_failed: 1437 get_mem_obj_from_handle_failed: 1438 map_memory_to_gpu_failed: 1439 mutex_unlock(&p->mutex); 1440 copy_from_user_failed: 1441 sync_memory_failed: 1442 kfree(devices_arr); 1443 1444 return err; 1445 } 1446 1447 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1448 struct kfd_process *p, void *data) 1449 { 1450 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1451 struct kfd_process_device *pdd, *peer_pdd; 1452 void *mem; 1453 struct kfd_dev *dev, *peer; 1454 long err = 0; 1455 uint32_t *devices_arr = NULL, i; 1456 1457 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1458 if (!dev) 1459 return -EINVAL; 1460 1461 if (!args->n_devices) { 1462 pr_debug("Device IDs array empty\n"); 1463 return -EINVAL; 1464 } 1465 if (args->n_success > args->n_devices) { 1466 pr_debug("n_success exceeds n_devices\n"); 1467 return -EINVAL; 1468 } 1469 1470 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1471 GFP_KERNEL); 1472 if (!devices_arr) 1473 return -ENOMEM; 1474 1475 err = copy_from_user(devices_arr, 1476 (void __user *)args->device_ids_array_ptr, 1477 args->n_devices * sizeof(*devices_arr)); 1478 if (err != 0) { 1479 err = -EFAULT; 1480 goto copy_from_user_failed; 1481 } 1482 1483 mutex_lock(&p->mutex); 1484 1485 pdd = kfd_get_process_device_data(dev, p); 1486 if (!pdd) { 1487 err = -EINVAL; 1488 goto bind_process_to_device_failed; 1489 } 1490 1491 mem = kfd_process_device_translate_handle(pdd, 1492 GET_IDR_HANDLE(args->handle)); 1493 if (!mem) { 1494 err = -ENOMEM; 1495 goto get_mem_obj_from_handle_failed; 1496 } 1497 1498 for (i = args->n_success; i < args->n_devices; i++) { 1499 peer = kfd_device_by_id(devices_arr[i]); 1500 if (!peer) { 1501 err = -EINVAL; 1502 goto get_mem_obj_from_handle_failed; 1503 } 1504 1505 peer_pdd = kfd_get_process_device_data(peer, p); 1506 if (!peer_pdd) { 1507 err = -ENODEV; 1508 goto get_mem_obj_from_handle_failed; 1509 } 1510 err = dev->kfd2kgd->unmap_memory_to_gpu( 1511 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1512 if (err) { 1513 pr_err("Failed to unmap from gpu %d/%d\n", 1514 i, args->n_devices); 1515 goto unmap_memory_from_gpu_failed; 1516 } 1517 args->n_success = i+1; 1518 } 1519 kfree(devices_arr); 1520 1521 mutex_unlock(&p->mutex); 1522 1523 return 0; 1524 1525 bind_process_to_device_failed: 1526 get_mem_obj_from_handle_failed: 1527 unmap_memory_from_gpu_failed: 1528 mutex_unlock(&p->mutex); 1529 copy_from_user_failed: 1530 kfree(devices_arr); 1531 return err; 1532 } 1533 1534 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1535 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1536 .cmd_drv = 0, .name = #ioctl} 1537 1538 /** Ioctl table */ 1539 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1540 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1541 kfd_ioctl_get_version, 0), 1542 1543 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1544 kfd_ioctl_create_queue, 0), 1545 1546 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1547 kfd_ioctl_destroy_queue, 0), 1548 1549 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1550 kfd_ioctl_set_memory_policy, 0), 1551 1552 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1553 kfd_ioctl_get_clock_counters, 0), 1554 1555 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1556 kfd_ioctl_get_process_apertures, 0), 1557 1558 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1559 kfd_ioctl_update_queue, 0), 1560 1561 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1562 kfd_ioctl_create_event, 0), 1563 1564 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1565 kfd_ioctl_destroy_event, 0), 1566 1567 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1568 kfd_ioctl_set_event, 0), 1569 1570 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1571 kfd_ioctl_reset_event, 0), 1572 1573 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1574 kfd_ioctl_wait_events, 0), 1575 1576 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1577 kfd_ioctl_dbg_register, 0), 1578 1579 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1580 kfd_ioctl_dbg_unregister, 0), 1581 1582 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1583 kfd_ioctl_dbg_address_watch, 0), 1584 1585 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1586 kfd_ioctl_dbg_wave_control, 0), 1587 1588 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1589 kfd_ioctl_set_scratch_backing_va, 0), 1590 1591 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1592 kfd_ioctl_get_tile_config, 0), 1593 1594 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1595 kfd_ioctl_set_trap_handler, 0), 1596 1597 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1598 kfd_ioctl_get_process_apertures_new, 0), 1599 1600 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1601 kfd_ioctl_acquire_vm, 0), 1602 1603 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1604 kfd_ioctl_alloc_memory_of_gpu, 0), 1605 1606 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1607 kfd_ioctl_free_memory_of_gpu, 0), 1608 1609 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1610 kfd_ioctl_map_memory_to_gpu, 0), 1611 1612 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1613 kfd_ioctl_unmap_memory_from_gpu, 0), 1614 1615 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 1616 kfd_ioctl_set_cu_mask, 0), 1617 1618 }; 1619 1620 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1621 1622 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1623 { 1624 struct kfd_process *process; 1625 amdkfd_ioctl_t *func; 1626 const struct amdkfd_ioctl_desc *ioctl = NULL; 1627 unsigned int nr = _IOC_NR(cmd); 1628 char stack_kdata[128]; 1629 char *kdata = NULL; 1630 unsigned int usize, asize; 1631 int retcode = -EINVAL; 1632 1633 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1634 goto err_i1; 1635 1636 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1637 u32 amdkfd_size; 1638 1639 ioctl = &amdkfd_ioctls[nr]; 1640 1641 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1642 usize = asize = _IOC_SIZE(cmd); 1643 if (amdkfd_size > asize) 1644 asize = amdkfd_size; 1645 1646 cmd = ioctl->cmd; 1647 } else 1648 goto err_i1; 1649 1650 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1651 1652 process = kfd_get_process(current); 1653 if (IS_ERR(process)) { 1654 dev_dbg(kfd_device, "no process\n"); 1655 goto err_i1; 1656 } 1657 1658 /* Do not trust userspace, use our own definition */ 1659 func = ioctl->func; 1660 1661 if (unlikely(!func)) { 1662 dev_dbg(kfd_device, "no function\n"); 1663 retcode = -EINVAL; 1664 goto err_i1; 1665 } 1666 1667 if (cmd & (IOC_IN | IOC_OUT)) { 1668 if (asize <= sizeof(stack_kdata)) { 1669 kdata = stack_kdata; 1670 } else { 1671 kdata = kmalloc(asize, GFP_KERNEL); 1672 if (!kdata) { 1673 retcode = -ENOMEM; 1674 goto err_i1; 1675 } 1676 } 1677 if (asize > usize) 1678 memset(kdata + usize, 0, asize - usize); 1679 } 1680 1681 if (cmd & IOC_IN) { 1682 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1683 retcode = -EFAULT; 1684 goto err_i1; 1685 } 1686 } else if (cmd & IOC_OUT) { 1687 memset(kdata, 0, usize); 1688 } 1689 1690 retcode = func(filep, process, kdata); 1691 1692 if (cmd & IOC_OUT) 1693 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1694 retcode = -EFAULT; 1695 1696 err_i1: 1697 if (!ioctl) 1698 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1699 task_pid_nr(current), cmd, nr); 1700 1701 if (kdata != stack_kdata) 1702 kfree(kdata); 1703 1704 if (retcode) 1705 dev_dbg(kfd_device, "ret = %d\n", retcode); 1706 1707 return retcode; 1708 } 1709 1710 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1711 { 1712 struct kfd_process *process; 1713 struct kfd_dev *dev = NULL; 1714 unsigned long vm_pgoff; 1715 unsigned int gpu_id; 1716 1717 process = kfd_get_process(current); 1718 if (IS_ERR(process)) 1719 return PTR_ERR(process); 1720 1721 vm_pgoff = vma->vm_pgoff; 1722 vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); 1723 gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); 1724 if (gpu_id) 1725 dev = kfd_device_by_id(gpu_id); 1726 1727 switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { 1728 case KFD_MMAP_TYPE_DOORBELL: 1729 if (!dev) 1730 return -ENODEV; 1731 return kfd_doorbell_mmap(dev, process, vma); 1732 1733 case KFD_MMAP_TYPE_EVENTS: 1734 return kfd_event_mmap(process, vma); 1735 1736 case KFD_MMAP_TYPE_RESERVED_MEM: 1737 if (!dev) 1738 return -ENODEV; 1739 return kfd_reserved_mem_mmap(dev, process, vma); 1740 } 1741 1742 return -EFAULT; 1743 } 1744